Change Device->Ctx
Change ihipDevice_t -> ihipCtx_t (new) Change ihipGetTlsDefaultDevice->ihipGetTlsDefaultCtx Some other changes from device->ctx where appropriate. Change-Id: I5c4ae93b2fd42c6303aa23d748eb166b7431925d
Этот коммит содержится в:
@@ -69,7 +69,7 @@ extern int HIP_DISABLE_HW_COPY_DEP;
|
||||
extern thread_local int tls_defaultDevice;
|
||||
extern thread_local hipError_t tls_lastHipError;
|
||||
class ihipStream_t;
|
||||
class ihipDevice_t;
|
||||
class ihipCtx_t;
|
||||
|
||||
|
||||
// Color defs for debug messages:
|
||||
@@ -210,6 +210,10 @@ static const char *dbName [] =
|
||||
#define tprintf(trace_level, ...)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ihipException : public std::exception
|
||||
{
|
||||
public:
|
||||
@@ -393,7 +397,7 @@ typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
|
||||
class ihipStream_t {
|
||||
public:
|
||||
typedef uint64_t SeqNum_t ;
|
||||
ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags);
|
||||
ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
|
||||
~ihipStream_t();
|
||||
|
||||
// kind is hipMemcpyKind
|
||||
@@ -425,7 +429,7 @@ typedef uint64_t SeqNum_t ;
|
||||
|
||||
//-- Non-racy accessors:
|
||||
// These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
|
||||
ihipDevice_t * getDevice() const;
|
||||
ihipCtx_t * getDevice() const;
|
||||
|
||||
|
||||
public:
|
||||
@@ -449,7 +453,7 @@ private:
|
||||
unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem);
|
||||
void setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent);
|
||||
|
||||
ihipDevice_t *_ctx; // parent context that owns this stream.
|
||||
ihipCtx_t *_ctx; // parent context that owns this stream.
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
|
||||
};
|
||||
@@ -534,10 +538,10 @@ public:
|
||||
// "Allocate" a stream ID:
|
||||
ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; };
|
||||
|
||||
bool isPeer(const ihipDevice_t *peer); // returns Trus if peer has access to memory physically located on this device.
|
||||
bool addPeer(ihipDevice_t *peer);
|
||||
bool removePeer(ihipDevice_t *peer);
|
||||
void resetPeers(ihipDevice_t *thisDevice);
|
||||
bool isPeer(const ihipCtx_t *peer); // returns Trus if peer has access to memory physically located on this device.
|
||||
bool addPeer(ihipCtx_t *peer);
|
||||
bool removePeer(ihipCtx_t *peer);
|
||||
void resetPeers(ihipCtx_t *thisDevice);
|
||||
|
||||
|
||||
void addStream(ihipStream_t *stream);
|
||||
@@ -553,7 +557,7 @@ private:
|
||||
|
||||
// These reflect the currently Enabled set of peers for this GPU:
|
||||
// Enabled peers have permissions to access the memory physically allocated on this device.
|
||||
std::list<ihipDevice_t*> _peers; // list of enabled peer devices.
|
||||
std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
|
||||
uint32_t _peerCnt; // number of enabled peers
|
||||
hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
|
||||
private:
|
||||
@@ -570,15 +574,15 @@ typedef LockedAccessor<ihipDeviceCritical_t> LockedAccessor_DeviceCrit_t;
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Functions which read or write the critical data are named locked_.
|
||||
// ihipDevice_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
|
||||
// ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
|
||||
// External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
|
||||
// performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
|
||||
class ihipDevice_t
|
||||
class ihipCtx_t
|
||||
{
|
||||
public: // Functions:
|
||||
ihipDevice_t() {}; // note: calls constructor for _criticalData
|
||||
ihipCtx_t() {}; // note: calls constructor for _criticalData
|
||||
void init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags);
|
||||
~ihipDevice_t();
|
||||
~ihipCtx_t();
|
||||
|
||||
void locked_addStream(ihipStream_t *s);
|
||||
void locked_removeStream(ihipStream_t *s);
|
||||
@@ -589,7 +593,7 @@ public: // Functions:
|
||||
ihipDeviceCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P.
|
||||
|
||||
public: // Data, set at initialization:
|
||||
unsigned _device_index; // index into g_devices.
|
||||
unsigned _device_index; // device ID
|
||||
|
||||
hipDeviceProp_t _props; // saved device properties.
|
||||
hc::accelerator _acc;
|
||||
@@ -619,19 +623,19 @@ private: // Critical data, protected with locked access:
|
||||
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// Global variable definition:
|
||||
extern std::once_flag hip_initialized;
|
||||
extern ihipDevice_t *g_devices; // Array of all non-emulated (ie GPU) accelerators in the system.
|
||||
extern bool g_visible_device; // Set the flag when HIP_VISIBLE_DEVICES is set
|
||||
extern unsigned g_deviceCnt;
|
||||
extern std::vector<int> g_hip_visible_devices; /* vector of integers that contains the visible device IDs */
|
||||
extern hsa_agent_t g_cpu_agent ; // the CPU agent.
|
||||
|
||||
//=================================================================================================
|
||||
void ihipInit();
|
||||
const char *ihipErrorString(hipError_t);
|
||||
ihipDevice_t *ihipGetTlsDefaultDevice();
|
||||
ihipDevice_t *ihipGetDevice(int);
|
||||
void ihipSetTs(hipEvent_t e);
|
||||
// Extern functions:
|
||||
extern void ihipInit();
|
||||
extern const char *ihipErrorString(hipError_t);
|
||||
extern ihipCtx_t *ihipGetTlsDefaultCtx();
|
||||
extern ihipCtx_t *ihipGetDevice(int);
|
||||
extern void ihipSetTs(hipEvent_t e);
|
||||
|
||||
template<typename T>
|
||||
hc::completion_future ihipMemcpyKernel(hipStream_t, T*, const T*, size_t);
|
||||
|
||||
@@ -150,7 +150,7 @@ hipError_t hipDeviceSynchronize(void)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
|
||||
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -164,16 +164,16 @@ hipError_t hipDeviceReset(void)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
auto *ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
// TODO-HCC
|
||||
// This function currently does a user-level cleanup of known resources.
|
||||
// It could benefit from KFD support to perform a more "nuclear" clean that would include any associated kernel resources and page table entries.
|
||||
|
||||
|
||||
if (device) {
|
||||
// Release device resources (streams and memory):
|
||||
device->locked_reset();
|
||||
if (ctx) {
|
||||
// Release ctx resources (streams and memory):
|
||||
ctx->locked_reset();
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
@@ -188,7 +188,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
ihipDevice_t * hipDevice = ihipGetDevice(device);
|
||||
auto * hipDevice = ihipGetDevice(device);
|
||||
hipDeviceProp_t *prop = &hipDevice->_props;
|
||||
if (hipDevice) {
|
||||
switch (attr) {
|
||||
@@ -264,7 +264,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
|
||||
|
||||
hipError_t e;
|
||||
|
||||
ihipDevice_t * hipDevice = ihipGetDevice(device);
|
||||
auto * hipDevice = ihipGetDevice(device);
|
||||
if (hipDevice) {
|
||||
// copy saved props
|
||||
*props = hipDevice->_props;
|
||||
@@ -283,7 +283,7 @@ hipError_t hipSetDeviceFlags( unsigned int flags)
|
||||
|
||||
hipError_t e;
|
||||
|
||||
ihipDevice_t * hipDevice = ihipGetDevice(tls_defaultDevice);
|
||||
auto * hipDevice = ihipGetTlsDefaultCtx();
|
||||
if(hipDevice){
|
||||
hipDevice->_device_flags = hipDevice->_device_flags | flags;
|
||||
e = hipSuccess;
|
||||
|
||||
@@ -79,8 +79,8 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
|
||||
// If stream == NULL, wait on all queues.
|
||||
// TODO-HCC fix this - is this conservative or still uses device timestamps?
|
||||
// TODO-HCC can we use barrier or event marker to implement better solution?
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
device->locked_syncDefaultStream(true);
|
||||
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true);
|
||||
|
||||
eh->_timestamp = hc::get_system_ticks();
|
||||
eh->_state = hipEventStatusRecorded;
|
||||
@@ -130,8 +130,8 @@ hipError_t hipEventSynchronize(hipEvent_t event)
|
||||
// Created but not actually recorded on any device:
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else if (eh->_stream == NULL) {
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
device->locked_syncDefaultStream(true);
|
||||
auto *ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
eh->_marker.wait((eh->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
|
||||
@@ -84,10 +84,10 @@ thread_local hipError_t tls_lastHipError = hipSuccess;
|
||||
//=================================================================================================
|
||||
//Forward Declarations:
|
||||
//=================================================================================================
|
||||
bool ihipIsValidDevice(unsigned deviceIndex);
|
||||
|
||||
std::once_flag hip_initialized;
|
||||
ihipDevice_t *g_devices;
|
||||
|
||||
// Array of primary contexts for each device:
|
||||
ihipCtx_t *g_primaryCtxArray; ;
|
||||
bool g_visible_device = false;
|
||||
unsigned g_deviceCnt;
|
||||
std::vector<int> g_hip_visible_devices;
|
||||
@@ -98,6 +98,46 @@ hsa_agent_t g_cpu_agent;
|
||||
//=================================================================================================
|
||||
// Implementation:
|
||||
//=================================================================================================
|
||||
// static global functions:
|
||||
|
||||
static inline bool ihipIsValidDevice(unsigned deviceIndex)
|
||||
{
|
||||
// deviceIndex is unsigned so always > 0
|
||||
return (deviceIndex < g_deviceCnt);
|
||||
}
|
||||
|
||||
//---
|
||||
ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex)
|
||||
{
|
||||
if (ihipIsValidDevice(deviceIndex)) {
|
||||
return &g_primaryCtxArray[deviceIndex];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// FIXME- index the new g_deviceArray data structure
|
||||
ihipCtx_t * ihipGetDevice(int deviceIndex)
|
||||
{
|
||||
if (ihipIsValidDevice(deviceIndex)) {
|
||||
return &g_primaryCtxArray[deviceIndex];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
ihipCtx_t *ihipGetTlsDefaultCtx()
|
||||
{
|
||||
// If this is invalid, the TLS state is corrupt.
|
||||
// This can fire if called before devices are initialized.
|
||||
// TODO - consider replacing assert with error code
|
||||
assert (ihipIsValidDevice(tls_defaultDevice));
|
||||
|
||||
return &g_primaryCtxArray[tls_defaultDevice];
|
||||
}
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
@@ -128,7 +168,7 @@ ihipSignal_t::~ihipSignal_t()
|
||||
// ihipStream_t:
|
||||
//=================================================================================================
|
||||
//---
|
||||
ihipStream_t::ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags) :
|
||||
ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags) :
|
||||
_id(0), // will be set by add function.
|
||||
_av(av),
|
||||
_flags(flags),
|
||||
@@ -216,14 +256,14 @@ template<>
|
||||
void ihipDeviceCriticalBase_t<DeviceMutex>::recomputePeerAgents()
|
||||
{
|
||||
_peerCnt = 0;
|
||||
std::for_each (_peers.begin(), _peers.end(), [this](ihipDevice_t* device) {
|
||||
_peerAgents[_peerCnt++] = device->_hsa_agent;
|
||||
std::for_each (_peers.begin(), _peers.end(), [this](ihipCtx_t* ctx) {
|
||||
_peerAgents[_peerCnt++] = ctx->_hsa_agent;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipDevice_t *peer)
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipCtx_t *peer)
|
||||
{
|
||||
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
||||
return (match != std::end(_peers));
|
||||
@@ -231,7 +271,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipDevice_t *peer)
|
||||
|
||||
|
||||
template<>
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipDevice_t *peer)
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipCtx_t *peer)
|
||||
{
|
||||
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
||||
if (match == std::end(_peers)) {
|
||||
@@ -247,7 +287,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipDevice_t *peer)
|
||||
|
||||
|
||||
template<>
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipDevice_t *peer)
|
||||
bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipCtx_t *peer)
|
||||
{
|
||||
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
||||
if (match != std::end(_peers)) {
|
||||
@@ -262,7 +302,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipDevice_t *peer)
|
||||
|
||||
|
||||
template<>
|
||||
void ihipDeviceCriticalBase_t<DeviceMutex>::resetPeers(ihipDevice_t *thisDevice)
|
||||
void ihipDeviceCriticalBase_t<DeviceMutex>::resetPeers(ihipCtx_t *thisDevice)
|
||||
{
|
||||
_peers.clear();
|
||||
_peerCnt = 0;
|
||||
@@ -279,20 +319,10 @@ void ihipDeviceCriticalBase_t<DeviceMutex>::addStream(ihipStream_t *stream)
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
|
||||
//---
|
||||
//Flavor that takes device index.
|
||||
ihipDevice_t * getDevice(unsigned deviceIndex)
|
||||
{
|
||||
if (ihipIsValidDevice(deviceIndex)) {
|
||||
return &g_devices[deviceIndex];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
ihipDevice_t * ihipStream_t::getDevice() const
|
||||
ihipCtx_t * ihipStream_t::getDevice() const
|
||||
{
|
||||
return _ctx;
|
||||
};
|
||||
@@ -496,7 +526,7 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t
|
||||
//
|
||||
//Reset the device - this is called from hipDeviceReset.
|
||||
//Device may be reset multiple times, and may be reset after init.
|
||||
void ihipDevice_t::locked_reset()
|
||||
void ihipCtx_t::locked_reset()
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_DeviceCrit_t crit(_criticalData);
|
||||
@@ -535,7 +565,7 @@ void ihipDevice_t::locked_reset()
|
||||
|
||||
|
||||
//---
|
||||
void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags)
|
||||
void ihipCtx_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags)
|
||||
{
|
||||
_device_index = device_index;
|
||||
_device_flags = flags;
|
||||
@@ -570,7 +600,7 @@ void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerat
|
||||
|
||||
|
||||
|
||||
ihipDevice_t::~ihipDevice_t()
|
||||
ihipCtx_t::~ihipCtx_t()
|
||||
{
|
||||
if (_default_stream) {
|
||||
delete _default_stream;
|
||||
@@ -704,7 +734,7 @@ static hsa_status_t countGpuAgents(hsa_agent_t agent, void *data) {
|
||||
}
|
||||
|
||||
// Internal version,
|
||||
hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
hipError_t ihipCtx_t::getProperties(hipDeviceProp_t* prop)
|
||||
{
|
||||
hipError_t e = hipSuccess;
|
||||
hsa_status_t err;
|
||||
@@ -881,7 +911,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
// Implement "default" stream syncronization
|
||||
// This waits for all other streams to drain before continuing.
|
||||
// If waitOnSelf is set, this additionally waits for the default stream to empty.
|
||||
void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf)
|
||||
void ihipCtx_t::locked_syncDefaultStream(bool waitOnSelf)
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(_criticalData);
|
||||
|
||||
@@ -904,7 +934,7 @@ void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf)
|
||||
}
|
||||
|
||||
//---
|
||||
void ihipDevice_t::locked_addStream(ihipStream_t *s)
|
||||
void ihipCtx_t::locked_addStream(ihipStream_t *s)
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(_criticalData);
|
||||
|
||||
@@ -912,7 +942,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s)
|
||||
}
|
||||
|
||||
//---
|
||||
void ihipDevice_t::locked_removeStream(ihipStream_t *s)
|
||||
void ihipCtx_t::locked_removeStream(ihipStream_t *s)
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(_criticalData);
|
||||
|
||||
@@ -922,7 +952,7 @@ void ihipDevice_t::locked_removeStream(ihipStream_t *s)
|
||||
|
||||
//---
|
||||
//Heavyweight synchronization that waits on all streams, ignoring hipStreamNonBlocking flag.
|
||||
void ihipDevice_t::locked_waitAllStreams()
|
||||
void ihipCtx_t::locked_waitAllStreams()
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(_criticalData);
|
||||
|
||||
@@ -1115,7 +1145,7 @@ void ihipInit()
|
||||
throw ihipException(hipErrorRuntimeOther);
|
||||
}
|
||||
|
||||
g_devices = new ihipDevice_t[deviceCnt];
|
||||
g_primaryCtxArray = new ihipCtx_t[deviceCnt];
|
||||
g_deviceCnt = 0;
|
||||
for (int i=0; i<accs.size(); i++) {
|
||||
// check if the device id is included in the HIP_VISIBLE_DEVICES env variable
|
||||
@@ -1125,7 +1155,7 @@ void ihipInit()
|
||||
//If device is not in visible devices list, ignore
|
||||
continue;
|
||||
}
|
||||
g_devices[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost);
|
||||
g_primaryCtxArray[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost);
|
||||
g_deviceCnt++;
|
||||
}
|
||||
}
|
||||
@@ -1139,34 +1169,8 @@ void ihipInit()
|
||||
}
|
||||
|
||||
|
||||
bool ihipIsValidDevice(unsigned deviceIndex)
|
||||
{
|
||||
// deviceIndex is unsigned so always > 0
|
||||
return (deviceIndex < g_deviceCnt);
|
||||
}
|
||||
|
||||
//---
|
||||
ihipDevice_t *ihipGetTlsDefaultDevice()
|
||||
{
|
||||
// If this is invalid, the TLS state is corrupt.
|
||||
// This can fire if called before devices are initialized.
|
||||
// TODO - consider replacing assert with error code
|
||||
assert (ihipIsValidDevice(tls_defaultDevice));
|
||||
|
||||
return &g_devices[tls_defaultDevice];
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
ihipDevice_t *ihipGetDevice(int deviceId)
|
||||
{
|
||||
if ((deviceId >= 0) && (deviceId < g_deviceCnt)) {
|
||||
return &g_devices[deviceId];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//---
|
||||
// Get the stream to use for a command submission.
|
||||
@@ -1176,7 +1180,7 @@ ihipDevice_t *ihipGetDevice(int deviceId)
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
|
||||
{
|
||||
if (stream == hipStreamNull ) {
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
ihipCtx_t *device = ihipGetTlsDefaultCtx();
|
||||
|
||||
#ifndef HIP_API_PER_THREAD_DEFAULT_STREAM
|
||||
device->locked_syncDefaultStream(false);
|
||||
@@ -1424,7 +1428,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked,
|
||||
void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent)
|
||||
{
|
||||
// current* represents the device associated with the specified stream.
|
||||
ihipDevice_t *streamDevice = this->getDevice();
|
||||
ihipCtx_t *streamDevice = this->getDevice();
|
||||
hsa_agent_t streamAgent = streamDevice->_hsa_agent;
|
||||
|
||||
// ROCR runtime logic is :
|
||||
@@ -1444,7 +1448,7 @@ void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType,
|
||||
|
||||
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind)
|
||||
{
|
||||
ihipDevice_t *device = this->getDevice();
|
||||
ihipCtx_t *device = this->getDevice();
|
||||
if (device == NULL) {
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
}
|
||||
@@ -1470,7 +1474,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
#if USE_PEER_TO_PEER>=2
|
||||
// TODO - consider refactor. Do we need to support simul access of enable/disable peers with access?
|
||||
LockedAccessor_DeviceCrit_t dcrit(device->criticalData());
|
||||
if (dcrit->isPeer(::getDevice(dstPtrInfo._appId)) && (dcrit->isPeer(::getDevice(srcPtrInfo._appId)))) {
|
||||
if (dcrit->isPeer(ihipGetDevice(dstPtrInfo._appId)) && (dcrit->isPeer(ihipGetDevice(srcPtrInfo._appId)))) {
|
||||
copyEngineCanSeeSrcAndDest = true;
|
||||
}
|
||||
#endif
|
||||
@@ -1654,7 +1658,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
ihipDevice_t *device = this->getDevice();
|
||||
ihipCtx_t *device = this->getDevice();
|
||||
|
||||
if (device == NULL) {
|
||||
throw ihipException(hipErrorInvalidDevice);
|
||||
@@ -1740,7 +1744,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc)
|
||||
{
|
||||
HIP_INIT_API(deviceId, acc);
|
||||
|
||||
ihipDevice_t *d = ihipGetDevice(deviceId);
|
||||
ihipCtx_t *d = ihipGetDevice(deviceId);
|
||||
hipError_t err;
|
||||
if (d == NULL) {
|
||||
err = hipErrorInvalidDevice;
|
||||
@@ -1761,7 +1765,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
|
||||
HIP_INIT_API(stream, av);
|
||||
|
||||
if (stream == hipStreamNull ) {
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
ihipCtx_t *device = ihipGetTlsDefaultCtx();
|
||||
stream = device->_default_stream;
|
||||
}
|
||||
|
||||
@@ -1775,7 +1779,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
|
||||
// TODO - describe naming convention. ihip _. No accessors. No early returns from functions. Set status to success at top, only set error codes in implementation. No tabs.
|
||||
// Caps convention _ or camelCase
|
||||
// if { }
|
||||
// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipDevice_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t).
|
||||
// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipCtx_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t).
|
||||
// locked_
|
||||
// TODO - describe MT strategy
|
||||
//
|
||||
|
||||
@@ -120,18 +120,18 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (device) {
|
||||
if (ctx) {
|
||||
const unsigned am_flags = 0;
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
|
||||
if (crit->peerCnt()) {
|
||||
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
}
|
||||
@@ -152,25 +152,25 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if(device){
|
||||
if(ctx){
|
||||
if(flags == hipHostMallocDefault){
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned);
|
||||
if(sizeBytes < 1 && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}else{
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned);
|
||||
hc::am_memtracker_update(*ptr, ctx->_device_index, amHostPinned);
|
||||
}
|
||||
tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
|
||||
} else if(flags & hipHostMallocMapped){
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
|
||||
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned);
|
||||
if(sizeBytes && (*ptr == NULL)){
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}else{
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, flags);
|
||||
hc::am_memtracker_update(*ptr, ctx->_device_index, flags);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
|
||||
if (crit->peerCnt()) {
|
||||
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
}
|
||||
@@ -212,19 +212,19 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
|
||||
*pitch = ((((int)width-1)/128) + 1)*128;
|
||||
const size_t sizeBytes = (*pitch)*height;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
//err = hipMalloc(ptr, (*pitch)*height);
|
||||
if (device) {
|
||||
if (ctx) {
|
||||
const unsigned am_flags = 0;
|
||||
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags);
|
||||
|
||||
if (sizeBytes && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
|
||||
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
|
||||
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
@@ -255,7 +255,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
*array = (hipArray*)malloc(sizeof(hipArray));
|
||||
array[0]->width = width;
|
||||
@@ -265,22 +265,22 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
|
||||
void ** ptr = &array[0]->data;
|
||||
|
||||
if (device) {
|
||||
if (ctx) {
|
||||
const unsigned am_flags = 0;
|
||||
const size_t size = width*height;
|
||||
|
||||
switch(desc->f) {
|
||||
case hipChannelFormatKindSigned:
|
||||
*ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(size*sizeof(int), ctx->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindUnsigned:
|
||||
*ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(size*sizeof(unsigned int), ctx->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
*ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(size*sizeof(float), ctx->_acc, am_flags);
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
*ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags);
|
||||
*ptr = hc::am_alloc(size*sizeof(size_t), ctx->_acc, am_flags);
|
||||
break;
|
||||
default:
|
||||
hip_status = hipErrorUnknown;
|
||||
@@ -289,9 +289,9 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
|
||||
if (size && (*ptr == NULL)) {
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
} else {
|
||||
hc::am_memtracker_update(*ptr, device->_device_index, 0);
|
||||
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
|
||||
{
|
||||
LockedAccessor_DeviceCrit_t crit(device->criticalData());
|
||||
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
|
||||
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
|
||||
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
|
||||
if (hsa_status != HSA_STATUS_SUCCESS) {
|
||||
@@ -342,7 +342,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if(hostPtr == NULL){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -354,17 +354,17 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
if(am_status == AM_SUCCESS){
|
||||
hip_status = hipErrorHostMemoryAlreadyRegistered;
|
||||
}else{
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if(hostPtr == NULL){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
if(device){
|
||||
if(ctx){
|
||||
if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){
|
||||
std::vector<hc::accelerator>vecAcc;
|
||||
for(int i=0;i<g_deviceCnt;i++){
|
||||
vecAcc.push_back(g_devices[i]._acc);
|
||||
vecAcc.push_back(ihipGetDevice(i)->_acc);
|
||||
}
|
||||
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
|
||||
am_status = hc::am_memory_host_lock(ctx->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
|
||||
if(am_status == AM_SUCCESS){
|
||||
hip_status = hipSuccess;
|
||||
}else{
|
||||
@@ -382,12 +382,12 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
hipError_t hipHostUnregister(void *hostPtr)
|
||||
{
|
||||
HIP_INIT_API(hostPtr);
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t hip_status = hipSuccess;
|
||||
if(hostPtr == NULL){
|
||||
hip_status = hipErrorInvalidValue;
|
||||
}else{
|
||||
am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr);
|
||||
am_status_t am_status = hc::am_memory_host_unlock(ctx->_acc, hostPtr);
|
||||
if(am_status != AM_SUCCESS){
|
||||
hip_status = hipErrorHostMemoryNotRegistered;
|
||||
}
|
||||
@@ -406,13 +406,13 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou
|
||||
{
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
auto device = ihipGetTlsDefaultDevice();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
//hsa_signal_t depSignal;
|
||||
//int depSignalCnt = device._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D);
|
||||
//int depSignalCnt = ctx._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D);
|
||||
assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL.
|
||||
|
||||
device->_acc.memcpy_symbol(symbolName, (void*) src,count, offset);
|
||||
ctx->_acc.memcpy_symbol(symbolName, (void*) src,count, offset);
|
||||
#endif
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -692,19 +692,19 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
ihipDevice_t * hipDevice = ihipGetTlsDefaultDevice();
|
||||
if (hipDevice) {
|
||||
ihipCtx_t * ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
if (total) {
|
||||
*total = hipDevice->_props.totalGlobalMem;
|
||||
*total = ctx->_props.totalGlobalMem;
|
||||
}
|
||||
|
||||
if (free) {
|
||||
// TODO - replace with kernel-level for reporting free memory:
|
||||
size_t deviceMemSize, hostMemSize, userMemSize;
|
||||
hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
|
||||
hc::am_memtracker_sizeinfo(ctx->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
|
||||
printf ("deviceMemSize=%zu\n", deviceMemSize);
|
||||
|
||||
*free = hipDevice->_props.totalGlobalMem - deviceMemSize;
|
||||
*free = ctx->_props.totalGlobalMem - deviceMemSize;
|
||||
}
|
||||
|
||||
} else {
|
||||
@@ -723,7 +723,7 @@ hipError_t hipFree(void* ptr)
|
||||
hipError_t hipStatus = hipErrorInvalidDevicePointer;
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
|
||||
if (ptr) {
|
||||
hc::accelerator acc;
|
||||
@@ -749,7 +749,7 @@ hipError_t hipHostFree(void* ptr)
|
||||
HIP_INIT_API(ptr);
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
|
||||
|
||||
hipError_t hipStatus = hipErrorInvalidValue;
|
||||
@@ -785,7 +785,7 @@ hipError_t hipFreeArray(hipArray* array)
|
||||
hipError_t hipStatus = hipErrorInvalidDevicePointer;
|
||||
|
||||
// Synchronize to ensure all work has finished.
|
||||
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
|
||||
|
||||
if(array->data) {
|
||||
hc::accelerator acc;
|
||||
|
||||
@@ -66,7 +66,7 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId)
|
||||
|
||||
hipError_t err = hipSuccess;
|
||||
|
||||
auto thisDevice = ihipGetTlsDefaultDevice();
|
||||
auto thisDevice = ihipGetTlsDefaultCtx();
|
||||
auto peerDevice = ihipGetDevice(peerDeviceId);
|
||||
if ((thisDevice != NULL) && (peerDevice != NULL)) {
|
||||
#if USE_PEER_TO_PEER>=2
|
||||
@@ -111,7 +111,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags)
|
||||
if (flags != 0) {
|
||||
err = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto thisDevice = ihipGetTlsDefaultDevice();
|
||||
auto thisDevice = ihipGetTlsDefaultCtx();
|
||||
auto peerDevice = ihipGetDevice(peerDeviceId);
|
||||
if (thisDevice == peerDevice) {
|
||||
err = hipErrorInvalidDevice; // Can't enable peer access to self.
|
||||
|
||||
@@ -30,8 +30,8 @@ THE SOFTWARE.
|
||||
//---
|
||||
hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
|
||||
{
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
hc::accelerator acc = device->_acc;
|
||||
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
|
||||
hc::accelerator acc = ctx->_acc;
|
||||
|
||||
// TODO - se try-catch loop to detect memory exception?
|
||||
//
|
||||
@@ -39,9 +39,9 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
|
||||
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
|
||||
//This matches CUDA stream behavior:
|
||||
|
||||
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags);
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
|
||||
|
||||
device->locked_addStream(istream);
|
||||
ctx->locked_addStream(istream);
|
||||
|
||||
*stream = istream;
|
||||
tprintf(DB_SYNC, "hipStreamCreate, stream=%p\n", *stream);
|
||||
@@ -98,8 +98,8 @@ hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (stream == NULL) {
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
device->locked_syncDefaultStream(true/*waitOnSelf*/);
|
||||
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true/*waitOnSelf*/);
|
||||
} else {
|
||||
stream->locked_wait();
|
||||
e = hipSuccess;
|
||||
@@ -122,17 +122,17 @@ hipError_t hipStreamDestroy(hipStream_t stream)
|
||||
|
||||
//--- Drain the stream:
|
||||
if (stream == NULL) {
|
||||
ihipDevice_t *device = ihipGetTlsDefaultDevice();
|
||||
device->locked_syncDefaultStream(true/*waitOnSelf*/);
|
||||
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true/*waitOnSelf*/);
|
||||
} else {
|
||||
stream->locked_wait();
|
||||
e = hipSuccess;
|
||||
}
|
||||
|
||||
ihipDevice_t *device = stream->getDevice();
|
||||
ihipCtx_t *ctx = stream->getDevice();
|
||||
|
||||
if (device) {
|
||||
device->locked_removeStream(stream);
|
||||
if (ctx) {
|
||||
ctx->locked_removeStream(stream);
|
||||
delete stream;
|
||||
} else {
|
||||
e = hipErrorInvalidResourceHandle;
|
||||
|
||||
Ссылка в новой задаче
Block a user