diff --git a/hipamd/include/hcc_detail/hip_hcc.h b/hipamd/include/hcc_detail/hip_hcc.h index ee8692769d..8202b279b6 100644 --- a/hipamd/include/hcc_detail/hip_hcc.h +++ b/hipamd/include/hcc_detail/hip_hcc.h @@ -69,7 +69,7 @@ extern int HIP_DISABLE_HW_COPY_DEP; extern thread_local int tls_defaultDevice; extern thread_local hipError_t tls_lastHipError; class ihipStream_t; -class ihipDevice_t; +class ihipCtx_t; // Color defs for debug messages: @@ -210,6 +210,10 @@ static const char *dbName [] = #define tprintf(trace_level, ...) #endif + + + + class ihipException : public std::exception { public: @@ -393,7 +397,7 @@ typedef LockedAccessor LockedAccessor_StreamCrit_t; class ihipStream_t { public: typedef uint64_t SeqNum_t ; - ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags); + ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags); ~ihipStream_t(); // kind is hipMemcpyKind @@ -425,7 +429,7 @@ typedef uint64_t SeqNum_t ; //-- Non-racy accessors: // These functions access fields set at initialization time and are non-racy (so do not acquire mutex) - ihipDevice_t * getDevice() const; + ihipCtx_t * getDevice() const; public: @@ -449,7 +453,7 @@ private: unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem); void setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent); - ihipDevice_t *_ctx; // parent context that owns this stream. + ihipCtx_t *_ctx; // parent context that owns this stream. friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s); }; @@ -534,10 +538,10 @@ public: // "Allocate" a stream ID: ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; - bool isPeer(const ihipDevice_t *peer); // returns Trus if peer has access to memory physically located on this device. - bool addPeer(ihipDevice_t *peer); - bool removePeer(ihipDevice_t *peer); - void resetPeers(ihipDevice_t *thisDevice); + bool isPeer(const ihipCtx_t *peer); // returns Trus if peer has access to memory physically located on this device. + bool addPeer(ihipCtx_t *peer); + bool removePeer(ihipCtx_t *peer); + void resetPeers(ihipCtx_t *thisDevice); void addStream(ihipStream_t *stream); @@ -553,7 +557,7 @@ private: // These reflect the currently Enabled set of peers for this GPU: // Enabled peers have permissions to access the memory physically allocated on this device. - std::list _peers; // list of enabled peer devices. + std::list _peers; // list of enabled peer devices. uint32_t _peerCnt; // number of enabled peers hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.) private: @@ -570,15 +574,15 @@ typedef LockedAccessor LockedAccessor_DeviceCrit_t; //------------------------------------------------------------------------------------------------- // Functions which read or write the critical data are named locked_. -// ihipDevice_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function. +// ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function. // External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in // performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all. -class ihipDevice_t +class ihipCtx_t { public: // Functions: - ihipDevice_t() {}; // note: calls constructor for _criticalData + ihipCtx_t() {}; // note: calls constructor for _criticalData void init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags); - ~ihipDevice_t(); + ~ihipCtx_t(); void locked_addStream(ihipStream_t *s); void locked_removeStream(ihipStream_t *s); @@ -589,7 +593,7 @@ public: // Functions: ihipDeviceCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P. public: // Data, set at initialization: - unsigned _device_index; // index into g_devices. + unsigned _device_index; // device ID hipDeviceProp_t _props; // saved device properties. hc::accelerator _acc; @@ -619,19 +623,19 @@ private: // Critical data, protected with locked access: +//================================================================================================= // Global variable definition: extern std::once_flag hip_initialized; -extern ihipDevice_t *g_devices; // Array of all non-emulated (ie GPU) accelerators in the system. -extern bool g_visible_device; // Set the flag when HIP_VISIBLE_DEVICES is set extern unsigned g_deviceCnt; -extern std::vector g_hip_visible_devices; /* vector of integers that contains the visible device IDs */ extern hsa_agent_t g_cpu_agent ; // the CPU agent. + //================================================================================================= -void ihipInit(); -const char *ihipErrorString(hipError_t); -ihipDevice_t *ihipGetTlsDefaultDevice(); -ihipDevice_t *ihipGetDevice(int); -void ihipSetTs(hipEvent_t e); +// Extern functions: +extern void ihipInit(); +extern const char *ihipErrorString(hipError_t); +extern ihipCtx_t *ihipGetTlsDefaultCtx(); +extern ihipCtx_t *ihipGetDevice(int); +extern void ihipSetTs(hipEvent_t e); template hc::completion_future ihipMemcpyKernel(hipStream_t, T*, const T*, size_t); diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index cfc285427c..bc8879a120 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -150,7 +150,7 @@ hipError_t hipDeviceSynchronize(void) { HIP_INIT_API(); - ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. return ihipLogStatus(hipSuccess); } @@ -164,16 +164,16 @@ hipError_t hipDeviceReset(void) { HIP_INIT_API(); - ihipDevice_t *device = ihipGetTlsDefaultDevice(); + auto *ctx = ihipGetTlsDefaultCtx(); // TODO-HCC // This function currently does a user-level cleanup of known resources. // It could benefit from KFD support to perform a more "nuclear" clean that would include any associated kernel resources and page table entries. - if (device) { - // Release device resources (streams and memory): - device->locked_reset(); + if (ctx) { + // Release ctx resources (streams and memory): + ctx->locked_reset(); } return ihipLogStatus(hipSuccess); @@ -188,7 +188,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) hipError_t e = hipSuccess; - ihipDevice_t * hipDevice = ihipGetDevice(device); + auto * hipDevice = ihipGetDevice(device); hipDeviceProp_t *prop = &hipDevice->_props; if (hipDevice) { switch (attr) { @@ -264,7 +264,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) hipError_t e; - ihipDevice_t * hipDevice = ihipGetDevice(device); + auto * hipDevice = ihipGetDevice(device); if (hipDevice) { // copy saved props *props = hipDevice->_props; @@ -283,7 +283,7 @@ hipError_t hipSetDeviceFlags( unsigned int flags) hipError_t e; - ihipDevice_t * hipDevice = ihipGetDevice(tls_defaultDevice); + auto * hipDevice = ihipGetTlsDefaultCtx(); if(hipDevice){ hipDevice->_device_flags = hipDevice->_device_flags | flags; e = hipSuccess; diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index 1514fc5868..acc872052c 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -79,8 +79,8 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) // If stream == NULL, wait on all queues. // TODO-HCC fix this - is this conservative or still uses device timestamps? // TODO-HCC can we use barrier or event marker to implement better solution? - ihipDevice_t *device = ihipGetTlsDefaultDevice(); - device->locked_syncDefaultStream(true); + ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); + ctx->locked_syncDefaultStream(true); eh->_timestamp = hc::get_system_ticks(); eh->_state = hipEventStatusRecorded; @@ -130,8 +130,8 @@ hipError_t hipEventSynchronize(hipEvent_t event) // Created but not actually recorded on any device: return ihipLogStatus(hipSuccess); } else if (eh->_stream == NULL) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); - device->locked_syncDefaultStream(true); + auto *ctx = ihipGetTlsDefaultCtx(); + ctx->locked_syncDefaultStream(true); return ihipLogStatus(hipSuccess); } else { eh->_marker.wait((eh->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 3640cf45b8..30d0e301e6 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -84,10 +84,10 @@ thread_local hipError_t tls_lastHipError = hipSuccess; //================================================================================================= //Forward Declarations: //================================================================================================= -bool ihipIsValidDevice(unsigned deviceIndex); - std::once_flag hip_initialized; -ihipDevice_t *g_devices; + +// Array of primary contexts for each device: +ihipCtx_t *g_primaryCtxArray; ; bool g_visible_device = false; unsigned g_deviceCnt; std::vector g_hip_visible_devices; @@ -98,6 +98,46 @@ hsa_agent_t g_cpu_agent; //================================================================================================= // Implementation: //================================================================================================= +// static global functions: + +static inline bool ihipIsValidDevice(unsigned deviceIndex) +{ + // deviceIndex is unsigned so always > 0 + return (deviceIndex < g_deviceCnt); +} + +//--- +ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex) +{ + if (ihipIsValidDevice(deviceIndex)) { + return &g_primaryCtxArray[deviceIndex]; + } else { + return NULL; + } +}; + + +// FIXME- index the new g_deviceArray data structure +ihipCtx_t * ihipGetDevice(int deviceIndex) +{ + if (ihipIsValidDevice(deviceIndex)) { + return &g_primaryCtxArray[deviceIndex]; + } else { + return NULL; + } +} + + +//--- +ihipCtx_t *ihipGetTlsDefaultCtx() +{ + // If this is invalid, the TLS state is corrupt. + // This can fire if called before devices are initialized. + // TODO - consider replacing assert with error code + assert (ihipIsValidDevice(tls_defaultDevice)); + + return &g_primaryCtxArray[tls_defaultDevice]; +} //================================================================================================= @@ -128,7 +168,7 @@ ihipSignal_t::~ihipSignal_t() // ihipStream_t: //================================================================================================= //--- -ihipStream_t::ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags) : +ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags) : _id(0), // will be set by add function. _av(av), _flags(flags), @@ -216,14 +256,14 @@ template<> void ihipDeviceCriticalBase_t::recomputePeerAgents() { _peerCnt = 0; - std::for_each (_peers.begin(), _peers.end(), [this](ihipDevice_t* device) { - _peerAgents[_peerCnt++] = device->_hsa_agent; + std::for_each (_peers.begin(), _peers.end(), [this](ihipCtx_t* ctx) { + _peerAgents[_peerCnt++] = ctx->_hsa_agent; }); } template<> -bool ihipDeviceCriticalBase_t::isPeer(const ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::isPeer(const ihipCtx_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); return (match != std::end(_peers)); @@ -231,7 +271,7 @@ bool ihipDeviceCriticalBase_t::isPeer(const ihipDevice_t *peer) template<> -bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::addPeer(ihipCtx_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match == std::end(_peers)) { @@ -247,7 +287,7 @@ bool ihipDeviceCriticalBase_t::addPeer(ihipDevice_t *peer) template<> -bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) +bool ihipDeviceCriticalBase_t::removePeer(ihipCtx_t *peer) { auto match = std::find(_peers.begin(), _peers.end(), peer); if (match != std::end(_peers)) { @@ -262,7 +302,7 @@ bool ihipDeviceCriticalBase_t::removePeer(ihipDevice_t *peer) template<> -void ihipDeviceCriticalBase_t::resetPeers(ihipDevice_t *thisDevice) +void ihipDeviceCriticalBase_t::resetPeers(ihipCtx_t *thisDevice) { _peers.clear(); _peerCnt = 0; @@ -279,20 +319,10 @@ void ihipDeviceCriticalBase_t::addStream(ihipStream_t *stream) //------------------------------------------------------------------------------------------------- -//--- -//Flavor that takes device index. -ihipDevice_t * getDevice(unsigned deviceIndex) -{ - if (ihipIsValidDevice(deviceIndex)) { - return &g_devices[deviceIndex]; - } else { - return NULL; - } -}; //--- -ihipDevice_t * ihipStream_t::getDevice() const +ihipCtx_t * ihipStream_t::getDevice() const { return _ctx; }; @@ -496,7 +526,7 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t // //Reset the device - this is called from hipDeviceReset. //Device may be reset multiple times, and may be reset after init. -void ihipDevice_t::locked_reset() +void ihipCtx_t::locked_reset() { // Obtain mutex access to the device critical data, release by destructor LockedAccessor_DeviceCrit_t crit(_criticalData); @@ -535,7 +565,7 @@ void ihipDevice_t::locked_reset() //--- -void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) +void ihipCtx_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags) { _device_index = device_index; _device_flags = flags; @@ -570,7 +600,7 @@ void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerat -ihipDevice_t::~ihipDevice_t() +ihipCtx_t::~ihipCtx_t() { if (_default_stream) { delete _default_stream; @@ -704,7 +734,7 @@ static hsa_status_t countGpuAgents(hsa_agent_t agent, void *data) { } // Internal version, -hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) +hipError_t ihipCtx_t::getProperties(hipDeviceProp_t* prop) { hipError_t e = hipSuccess; hsa_status_t err; @@ -881,7 +911,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) // Implement "default" stream syncronization // This waits for all other streams to drain before continuing. // If waitOnSelf is set, this additionally waits for the default stream to empty. -void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf) +void ihipCtx_t::locked_syncDefaultStream(bool waitOnSelf) { LockedAccessor_DeviceCrit_t crit(_criticalData); @@ -904,7 +934,7 @@ void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf) } //--- -void ihipDevice_t::locked_addStream(ihipStream_t *s) +void ihipCtx_t::locked_addStream(ihipStream_t *s) { LockedAccessor_DeviceCrit_t crit(_criticalData); @@ -912,7 +942,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s) } //--- -void ihipDevice_t::locked_removeStream(ihipStream_t *s) +void ihipCtx_t::locked_removeStream(ihipStream_t *s) { LockedAccessor_DeviceCrit_t crit(_criticalData); @@ -922,7 +952,7 @@ void ihipDevice_t::locked_removeStream(ihipStream_t *s) //--- //Heavyweight synchronization that waits on all streams, ignoring hipStreamNonBlocking flag. -void ihipDevice_t::locked_waitAllStreams() +void ihipCtx_t::locked_waitAllStreams() { LockedAccessor_DeviceCrit_t crit(_criticalData); @@ -1115,7 +1145,7 @@ void ihipInit() throw ihipException(hipErrorRuntimeOther); } - g_devices = new ihipDevice_t[deviceCnt]; + g_primaryCtxArray = new ihipCtx_t[deviceCnt]; g_deviceCnt = 0; for (int i=0; i 0 - return (deviceIndex < g_deviceCnt); -} - -//--- -ihipDevice_t *ihipGetTlsDefaultDevice() -{ - // If this is invalid, the TLS state is corrupt. - // This can fire if called before devices are initialized. - // TODO - consider replacing assert with error code - assert (ihipIsValidDevice(tls_defaultDevice)); - - return &g_devices[tls_defaultDevice]; -} -//--- -ihipDevice_t *ihipGetDevice(int deviceId) -{ - if ((deviceId >= 0) && (deviceId < g_deviceCnt)) { - return &g_devices[deviceId]; - } else { - return NULL; - } - -} //--- // Get the stream to use for a command submission. @@ -1176,7 +1180,7 @@ ihipDevice_t *ihipGetDevice(int deviceId) hipStream_t ihipSyncAndResolveStream(hipStream_t stream) { if (stream == hipStreamNull ) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); + ihipCtx_t *device = ihipGetTlsDefaultCtx(); #ifndef HIP_API_PER_THREAD_DEFAULT_STREAM device->locked_syncDefaultStream(false); @@ -1424,7 +1428,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked, void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent) { // current* represents the device associated with the specified stream. - ihipDevice_t *streamDevice = this->getDevice(); + ihipCtx_t *streamDevice = this->getDevice(); hsa_agent_t streamAgent = streamDevice->_hsa_agent; // ROCR runtime logic is : @@ -1444,7 +1448,7 @@ void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind) { - ihipDevice_t *device = this->getDevice(); + ihipCtx_t *device = this->getDevice(); if (device == NULL) { throw ihipException(hipErrorInvalidDevice); } @@ -1470,7 +1474,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const #if USE_PEER_TO_PEER>=2 // TODO - consider refactor. Do we need to support simul access of enable/disable peers with access? LockedAccessor_DeviceCrit_t dcrit(device->criticalData()); - if (dcrit->isPeer(::getDevice(dstPtrInfo._appId)) && (dcrit->isPeer(::getDevice(srcPtrInfo._appId)))) { + if (dcrit->isPeer(ihipGetDevice(dstPtrInfo._appId)) && (dcrit->isPeer(ihipGetDevice(srcPtrInfo._appId)))) { copyEngineCanSeeSrcAndDest = true; } #endif @@ -1654,7 +1658,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig { LockedAccessor_StreamCrit_t crit(_criticalData); - ihipDevice_t *device = this->getDevice(); + ihipCtx_t *device = this->getDevice(); if (device == NULL) { throw ihipException(hipErrorInvalidDevice); @@ -1740,7 +1744,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc) { HIP_INIT_API(deviceId, acc); - ihipDevice_t *d = ihipGetDevice(deviceId); + ihipCtx_t *d = ihipGetDevice(deviceId); hipError_t err; if (d == NULL) { err = hipErrorInvalidDevice; @@ -1761,7 +1765,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a HIP_INIT_API(stream, av); if (stream == hipStreamNull ) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); + ihipCtx_t *device = ihipGetTlsDefaultCtx(); stream = device->_default_stream; } @@ -1775,7 +1779,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a // TODO - describe naming convention. ihip _. No accessors. No early returns from functions. Set status to success at top, only set error codes in implementation. No tabs. // Caps convention _ or camelCase // if { } -// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipDevice_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t). +// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipCtx_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t). // locked_ // TODO - describe MT strategy // diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 94442f4698..91a27ce08b 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -120,18 +120,18 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) hipError_t hip_status = hipSuccess; - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); - if (device) { + if (ctx) { const unsigned am_flags = 0; - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + *ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags); if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; } else { - hc::am_memtracker_update(*ptr, device->_device_index, 0); + hc::am_memtracker_update(*ptr, ctx->_device_index, 0); { - LockedAccessor_DeviceCrit_t crit(device->criticalData()); + LockedAccessor_DeviceCrit_t crit(ctx->criticalData()); if (crit->peerCnt()) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); } @@ -152,25 +152,25 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hipError_t hip_status = hipSuccess; - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); - if(device){ + if(ctx){ if(flags == hipHostMallocDefault){ - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); + *ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned); if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; }else{ - hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned); + hc::am_memtracker_update(*ptr, ctx->_device_index, amHostPinned); } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); } else if(flags & hipHostMallocMapped){ - *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); + *ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned); if(sizeBytes && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; }else{ - hc::am_memtracker_update(*ptr, device->_device_index, flags); + hc::am_memtracker_update(*ptr, ctx->_device_index, flags); { - LockedAccessor_DeviceCrit_t crit(device->criticalData()); + LockedAccessor_DeviceCrit_t crit(ctx->criticalData()); if (crit->peerCnt()) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); } @@ -212,19 +212,19 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height *pitch = ((((int)width-1)/128) + 1)*128; const size_t sizeBytes = (*pitch)*height; - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); //err = hipMalloc(ptr, (*pitch)*height); - if (device) { + if (ctx) { const unsigned am_flags = 0; - *ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags); + *ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags); if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; } else { - hc::am_memtracker_update(*ptr, device->_device_index, 0); + hc::am_memtracker_update(*ptr, ctx->_device_index, 0); { - LockedAccessor_DeviceCrit_t crit(device->criticalData()); + LockedAccessor_DeviceCrit_t crit(ctx->criticalData()); if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { @@ -255,7 +255,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, hipError_t hip_status = hipSuccess; - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); *array = (hipArray*)malloc(sizeof(hipArray)); array[0]->width = width; @@ -265,22 +265,22 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, void ** ptr = &array[0]->data; - if (device) { + if (ctx) { const unsigned am_flags = 0; const size_t size = width*height; switch(desc->f) { case hipChannelFormatKindSigned: - *ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags); + *ptr = hc::am_alloc(size*sizeof(int), ctx->_acc, am_flags); break; case hipChannelFormatKindUnsigned: - *ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags); + *ptr = hc::am_alloc(size*sizeof(unsigned int), ctx->_acc, am_flags); break; case hipChannelFormatKindFloat: - *ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags); + *ptr = hc::am_alloc(size*sizeof(float), ctx->_acc, am_flags); break; case hipChannelFormatKindNone: - *ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags); + *ptr = hc::am_alloc(size*sizeof(size_t), ctx->_acc, am_flags); break; default: hip_status = hipErrorUnknown; @@ -289,9 +289,9 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, if (size && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; } else { - hc::am_memtracker_update(*ptr, device->_device_index, 0); + hc::am_memtracker_update(*ptr, ctx->_device_index, 0); { - LockedAccessor_DeviceCrit_t crit(device->criticalData()); + LockedAccessor_DeviceCrit_t crit(ctx->criticalData()); if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved: hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); if (hsa_status != HSA_STATUS_SUCCESS) { @@ -342,7 +342,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) hipError_t hip_status = hipSuccess; - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); if(hostPtr == NULL){ return ihipLogStatus(hipErrorInvalidValue); } @@ -354,17 +354,17 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) if(am_status == AM_SUCCESS){ hip_status = hipErrorHostMemoryAlreadyRegistered; }else{ - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); if(hostPtr == NULL){ return ihipLogStatus(hipErrorInvalidValue); } - if(device){ + if(ctx){ if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){ std::vectorvecAcc; for(int i=0;i_acc); } - am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); + am_status = hc::am_memory_host_lock(ctx->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size()); if(am_status == AM_SUCCESS){ hip_status = hipSuccess; }else{ @@ -382,12 +382,12 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) hipError_t hipHostUnregister(void *hostPtr) { HIP_INIT_API(hostPtr); - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); hipError_t hip_status = hipSuccess; if(hostPtr == NULL){ hip_status = hipErrorInvalidValue; }else{ - am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr); + am_status_t am_status = hc::am_memory_host_unlock(ctx->_acc, hostPtr); if(am_status != AM_SUCCESS){ hip_status = hipErrorHostMemoryNotRegistered; } @@ -406,13 +406,13 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou { return ihipLogStatus(hipErrorInvalidValue); } - auto device = ihipGetTlsDefaultDevice(); + auto ctx = ihipGetTlsDefaultCtx(); //hsa_signal_t depSignal; - //int depSignalCnt = device._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D); + //int depSignalCnt = ctx._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D); assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL. - device->_acc.memcpy_symbol(symbolName, (void*) src,count, offset); + ctx->_acc.memcpy_symbol(symbolName, (void*) src,count, offset); #endif return ihipLogStatus(hipSuccess); } @@ -692,19 +692,19 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total) hipError_t e = hipSuccess; - ihipDevice_t * hipDevice = ihipGetTlsDefaultDevice(); - if (hipDevice) { + ihipCtx_t * ctx = ihipGetTlsDefaultCtx(); + if (ctx) { if (total) { - *total = hipDevice->_props.totalGlobalMem; + *total = ctx->_props.totalGlobalMem; } if (free) { // TODO - replace with kernel-level for reporting free memory: size_t deviceMemSize, hostMemSize, userMemSize; - hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize); + hc::am_memtracker_sizeinfo(ctx->_acc, &deviceMemSize, &hostMemSize, &userMemSize); printf ("deviceMemSize=%zu\n", deviceMemSize); - *free = hipDevice->_props.totalGlobalMem - deviceMemSize; + *free = ctx->_props.totalGlobalMem - deviceMemSize; } } else { @@ -723,7 +723,7 @@ hipError_t hipFree(void* ptr) hipError_t hipStatus = hipErrorInvalidDevicePointer; // Synchronize to ensure all work has finished. - ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. if (ptr) { hc::accelerator acc; @@ -749,7 +749,7 @@ hipError_t hipHostFree(void* ptr) HIP_INIT_API(ptr); // Synchronize to ensure all work has finished. - ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. hipError_t hipStatus = hipErrorInvalidValue; @@ -785,7 +785,7 @@ hipError_t hipFreeArray(hipArray* array) hipError_t hipStatus = hipErrorInvalidDevicePointer; // Synchronize to ensure all work has finished. - ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. + ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish. if(array->data) { hc::accelerator acc; diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index cec8017b0c..b7475d1d38 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -66,7 +66,7 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId) hipError_t err = hipSuccess; - auto thisDevice = ihipGetTlsDefaultDevice(); + auto thisDevice = ihipGetTlsDefaultCtx(); auto peerDevice = ihipGetDevice(peerDeviceId); if ((thisDevice != NULL) && (peerDevice != NULL)) { #if USE_PEER_TO_PEER>=2 @@ -111,7 +111,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags) if (flags != 0) { err = hipErrorInvalidValue; } else { - auto thisDevice = ihipGetTlsDefaultDevice(); + auto thisDevice = ihipGetTlsDefaultCtx(); auto peerDevice = ihipGetDevice(peerDeviceId); if (thisDevice == peerDevice) { err = hipErrorInvalidDevice; // Can't enable peer access to self. diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index d62abc49e2..2d8efdffb9 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -30,8 +30,8 @@ THE SOFTWARE. //--- hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); - hc::accelerator acc = device->_acc; + ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); + hc::accelerator acc = ctx->_acc; // TODO - se try-catch loop to detect memory exception? // @@ -39,9 +39,9 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags) //Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete: //This matches CUDA stream behavior: - auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags); + auto istream = new ihipStream_t(ctx, acc.create_view(), flags); - device->locked_addStream(istream); + ctx->locked_addStream(istream); *stream = istream; tprintf(DB_SYNC, "hipStreamCreate, stream=%p\n", *stream); @@ -98,8 +98,8 @@ hipError_t hipStreamSynchronize(hipStream_t stream) hipError_t e = hipSuccess; if (stream == NULL) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); - device->locked_syncDefaultStream(true/*waitOnSelf*/); + ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); + ctx->locked_syncDefaultStream(true/*waitOnSelf*/); } else { stream->locked_wait(); e = hipSuccess; @@ -122,17 +122,17 @@ hipError_t hipStreamDestroy(hipStream_t stream) //--- Drain the stream: if (stream == NULL) { - ihipDevice_t *device = ihipGetTlsDefaultDevice(); - device->locked_syncDefaultStream(true/*waitOnSelf*/); + ihipCtx_t *ctx = ihipGetTlsDefaultCtx(); + ctx->locked_syncDefaultStream(true/*waitOnSelf*/); } else { stream->locked_wait(); e = hipSuccess; } - ihipDevice_t *device = stream->getDevice(); + ihipCtx_t *ctx = stream->getDevice(); - if (device) { - device->locked_removeStream(stream); + if (ctx) { + ctx->locked_removeStream(stream); delete stream; } else { e = hipErrorInvalidResourceHandle;