Change ihipDevice_t -> ihipCtx_t (new)
Change ihipGetTlsDefaultDevice->ihipGetTlsDefaultCtx
Some other changes from device->ctx where appropriate.

Change-Id: I5c4ae93b2fd42c6303aa23d748eb166b7431925d
Этот коммит содержится в:
Ben Sander
2016-08-07 21:46:51 -05:00
родитель 3c604b6430
Коммит 0d16565061
7 изменённых файлов: 163 добавлений и 155 удалений
+26 -22
Просмотреть файл
@@ -69,7 +69,7 @@ extern int HIP_DISABLE_HW_COPY_DEP;
extern thread_local int tls_defaultDevice;
extern thread_local hipError_t tls_lastHipError;
class ihipStream_t;
class ihipDevice_t;
class ihipCtx_t;
// Color defs for debug messages:
@@ -210,6 +210,10 @@ static const char *dbName [] =
#define tprintf(trace_level, ...)
#endif
class ihipException : public std::exception
{
public:
@@ -393,7 +397,7 @@ typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
class ihipStream_t {
public:
typedef uint64_t SeqNum_t ;
ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags);
ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags);
~ihipStream_t();
// kind is hipMemcpyKind
@@ -425,7 +429,7 @@ typedef uint64_t SeqNum_t ;
//-- Non-racy accessors:
// These functions access fields set at initialization time and are non-racy (so do not acquire mutex)
ihipDevice_t * getDevice() const;
ihipCtx_t * getDevice() const;
public:
@@ -449,7 +453,7 @@ private:
unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem);
void setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent);
ihipDevice_t *_ctx; // parent context that owns this stream.
ihipCtx_t *_ctx; // parent context that owns this stream.
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
};
@@ -534,10 +538,10 @@ public:
// "Allocate" a stream ID:
ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; };
bool isPeer(const ihipDevice_t *peer); // returns Trus if peer has access to memory physically located on this device.
bool addPeer(ihipDevice_t *peer);
bool removePeer(ihipDevice_t *peer);
void resetPeers(ihipDevice_t *thisDevice);
bool isPeer(const ihipCtx_t *peer); // returns Trus if peer has access to memory physically located on this device.
bool addPeer(ihipCtx_t *peer);
bool removePeer(ihipCtx_t *peer);
void resetPeers(ihipCtx_t *thisDevice);
void addStream(ihipStream_t *stream);
@@ -553,7 +557,7 @@ private:
// These reflect the currently Enabled set of peers for this GPU:
// Enabled peers have permissions to access the memory physically allocated on this device.
std::list<ihipDevice_t*> _peers; // list of enabled peer devices.
std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
uint32_t _peerCnt; // number of enabled peers
hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
private:
@@ -570,15 +574,15 @@ typedef LockedAccessor<ihipDeviceCritical_t> LockedAccessor_DeviceCrit_t;
//-------------------------------------------------------------------------------------------------
// Functions which read or write the critical data are named locked_.
// ihipDevice_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
// ihipCtx_t does not use recursive locks so the ihip implementation must avoid calling a locked_ function from within a locked_ function.
// External functions which call several locked_ functions will acquire and release the lock for each function. if this occurs in
// performance-sensitive code we may want to refactor by adding non-locked functions and creating a new locked_ member function to call them all.
class ihipDevice_t
class ihipCtx_t
{
public: // Functions:
ihipDevice_t() {}; // note: calls constructor for _criticalData
ihipCtx_t() {}; // note: calls constructor for _criticalData
void init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags);
~ihipDevice_t();
~ihipCtx_t();
void locked_addStream(ihipStream_t *s);
void locked_removeStream(ihipStream_t *s);
@@ -589,7 +593,7 @@ public: // Functions:
ihipDeviceCritical_t &criticalData() { return _criticalData; }; // TODO, move private. Fix P2P.
public: // Data, set at initialization:
unsigned _device_index; // index into g_devices.
unsigned _device_index; // device ID
hipDeviceProp_t _props; // saved device properties.
hc::accelerator _acc;
@@ -619,19 +623,19 @@ private: // Critical data, protected with locked access:
//=================================================================================================
// Global variable definition:
extern std::once_flag hip_initialized;
extern ihipDevice_t *g_devices; // Array of all non-emulated (ie GPU) accelerators in the system.
extern bool g_visible_device; // Set the flag when HIP_VISIBLE_DEVICES is set
extern unsigned g_deviceCnt;
extern std::vector<int> g_hip_visible_devices; /* vector of integers that contains the visible device IDs */
extern hsa_agent_t g_cpu_agent ; // the CPU agent.
//=================================================================================================
void ihipInit();
const char *ihipErrorString(hipError_t);
ihipDevice_t *ihipGetTlsDefaultDevice();
ihipDevice_t *ihipGetDevice(int);
void ihipSetTs(hipEvent_t e);
// Extern functions:
extern void ihipInit();
extern const char *ihipErrorString(hipError_t);
extern ihipCtx_t *ihipGetTlsDefaultCtx();
extern ihipCtx_t *ihipGetDevice(int);
extern void ihipSetTs(hipEvent_t e);
template<typename T>
hc::completion_future ihipMemcpyKernel(hipStream_t, T*, const T*, size_t);
+8 -8
Просмотреть файл
@@ -150,7 +150,7 @@ hipError_t hipDeviceSynchronize(void)
{
HIP_INIT_API();
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
return ihipLogStatus(hipSuccess);
}
@@ -164,16 +164,16 @@ hipError_t hipDeviceReset(void)
{
HIP_INIT_API();
ihipDevice_t *device = ihipGetTlsDefaultDevice();
auto *ctx = ihipGetTlsDefaultCtx();
// TODO-HCC
// This function currently does a user-level cleanup of known resources.
// It could benefit from KFD support to perform a more "nuclear" clean that would include any associated kernel resources and page table entries.
if (device) {
// Release device resources (streams and memory):
device->locked_reset();
if (ctx) {
// Release ctx resources (streams and memory):
ctx->locked_reset();
}
return ihipLogStatus(hipSuccess);
@@ -188,7 +188,7 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
hipError_t e = hipSuccess;
ihipDevice_t * hipDevice = ihipGetDevice(device);
auto * hipDevice = ihipGetDevice(device);
hipDeviceProp_t *prop = &hipDevice->_props;
if (hipDevice) {
switch (attr) {
@@ -264,7 +264,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
hipError_t e;
ihipDevice_t * hipDevice = ihipGetDevice(device);
auto * hipDevice = ihipGetDevice(device);
if (hipDevice) {
// copy saved props
*props = hipDevice->_props;
@@ -283,7 +283,7 @@ hipError_t hipSetDeviceFlags( unsigned int flags)
hipError_t e;
ihipDevice_t * hipDevice = ihipGetDevice(tls_defaultDevice);
auto * hipDevice = ihipGetTlsDefaultCtx();
if(hipDevice){
hipDevice->_device_flags = hipDevice->_device_flags | flags;
e = hipSuccess;
+4 -4
Просмотреть файл
@@ -79,8 +79,8 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
// If stream == NULL, wait on all queues.
// TODO-HCC fix this - is this conservative or still uses device timestamps?
// TODO-HCC can we use barrier or event marker to implement better solution?
ihipDevice_t *device = ihipGetTlsDefaultDevice();
device->locked_syncDefaultStream(true);
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true);
eh->_timestamp = hc::get_system_ticks();
eh->_state = hipEventStatusRecorded;
@@ -130,8 +130,8 @@ hipError_t hipEventSynchronize(hipEvent_t event)
// Created but not actually recorded on any device:
return ihipLogStatus(hipSuccess);
} else if (eh->_stream == NULL) {
ihipDevice_t *device = ihipGetTlsDefaultDevice();
device->locked_syncDefaultStream(true);
auto *ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true);
return ihipLogStatus(hipSuccess);
} else {
eh->_marker.wait((eh->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
+69 -65
Просмотреть файл
@@ -84,10 +84,10 @@ thread_local hipError_t tls_lastHipError = hipSuccess;
//=================================================================================================
//Forward Declarations:
//=================================================================================================
bool ihipIsValidDevice(unsigned deviceIndex);
std::once_flag hip_initialized;
ihipDevice_t *g_devices;
// Array of primary contexts for each device:
ihipCtx_t *g_primaryCtxArray; ;
bool g_visible_device = false;
unsigned g_deviceCnt;
std::vector<int> g_hip_visible_devices;
@@ -98,6 +98,46 @@ hsa_agent_t g_cpu_agent;
//=================================================================================================
// Implementation:
//=================================================================================================
// static global functions:
static inline bool ihipIsValidDevice(unsigned deviceIndex)
{
// deviceIndex is unsigned so always > 0
return (deviceIndex < g_deviceCnt);
}
//---
ihipCtx_t * ihipGetPrimaryCtx(unsigned deviceIndex)
{
if (ihipIsValidDevice(deviceIndex)) {
return &g_primaryCtxArray[deviceIndex];
} else {
return NULL;
}
};
// FIXME- index the new g_deviceArray data structure
ihipCtx_t * ihipGetDevice(int deviceIndex)
{
if (ihipIsValidDevice(deviceIndex)) {
return &g_primaryCtxArray[deviceIndex];
} else {
return NULL;
}
}
//---
ihipCtx_t *ihipGetTlsDefaultCtx()
{
// If this is invalid, the TLS state is corrupt.
// This can fire if called before devices are initialized.
// TODO - consider replacing assert with error code
assert (ihipIsValidDevice(tls_defaultDevice));
return &g_primaryCtxArray[tls_defaultDevice];
}
//=================================================================================================
@@ -128,7 +168,7 @@ ihipSignal_t::~ihipSignal_t()
// ihipStream_t:
//=================================================================================================
//---
ihipStream_t::ihipStream_t(ihipDevice_t *ctx, hc::accelerator_view av, unsigned int flags) :
ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int flags) :
_id(0), // will be set by add function.
_av(av),
_flags(flags),
@@ -216,14 +256,14 @@ template<>
void ihipDeviceCriticalBase_t<DeviceMutex>::recomputePeerAgents()
{
_peerCnt = 0;
std::for_each (_peers.begin(), _peers.end(), [this](ihipDevice_t* device) {
_peerAgents[_peerCnt++] = device->_hsa_agent;
std::for_each (_peers.begin(), _peers.end(), [this](ihipCtx_t* ctx) {
_peerAgents[_peerCnt++] = ctx->_hsa_agent;
});
}
template<>
bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipDevice_t *peer)
bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipCtx_t *peer)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
return (match != std::end(_peers));
@@ -231,7 +271,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::isPeer(const ihipDevice_t *peer)
template<>
bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipDevice_t *peer)
bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipCtx_t *peer)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
if (match == std::end(_peers)) {
@@ -247,7 +287,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::addPeer(ihipDevice_t *peer)
template<>
bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipDevice_t *peer)
bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipCtx_t *peer)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
if (match != std::end(_peers)) {
@@ -262,7 +302,7 @@ bool ihipDeviceCriticalBase_t<DeviceMutex>::removePeer(ihipDevice_t *peer)
template<>
void ihipDeviceCriticalBase_t<DeviceMutex>::resetPeers(ihipDevice_t *thisDevice)
void ihipDeviceCriticalBase_t<DeviceMutex>::resetPeers(ihipCtx_t *thisDevice)
{
_peers.clear();
_peerCnt = 0;
@@ -279,20 +319,10 @@ void ihipDeviceCriticalBase_t<DeviceMutex>::addStream(ihipStream_t *stream)
//-------------------------------------------------------------------------------------------------
//---
//Flavor that takes device index.
ihipDevice_t * getDevice(unsigned deviceIndex)
{
if (ihipIsValidDevice(deviceIndex)) {
return &g_devices[deviceIndex];
} else {
return NULL;
}
};
//---
ihipDevice_t * ihipStream_t::getDevice() const
ihipCtx_t * ihipStream_t::getDevice() const
{
return _ctx;
};
@@ -496,7 +526,7 @@ int ihipStream_t::preCopyCommand(LockedAccessor_StreamCrit_t &crit, ihipSignal_t
//
//Reset the device - this is called from hipDeviceReset.
//Device may be reset multiple times, and may be reset after init.
void ihipDevice_t::locked_reset()
void ihipCtx_t::locked_reset()
{
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_DeviceCrit_t crit(_criticalData);
@@ -535,7 +565,7 @@ void ihipDevice_t::locked_reset()
//---
void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags)
void ihipCtx_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerator &acc, unsigned flags)
{
_device_index = device_index;
_device_flags = flags;
@@ -570,7 +600,7 @@ void ihipDevice_t::init(unsigned device_index, unsigned deviceCnt, hc::accelerat
ihipDevice_t::~ihipDevice_t()
ihipCtx_t::~ihipCtx_t()
{
if (_default_stream) {
delete _default_stream;
@@ -704,7 +734,7 @@ static hsa_status_t countGpuAgents(hsa_agent_t agent, void *data) {
}
// Internal version,
hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
hipError_t ihipCtx_t::getProperties(hipDeviceProp_t* prop)
{
hipError_t e = hipSuccess;
hsa_status_t err;
@@ -881,7 +911,7 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
// Implement "default" stream syncronization
// This waits for all other streams to drain before continuing.
// If waitOnSelf is set, this additionally waits for the default stream to empty.
void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf)
void ihipCtx_t::locked_syncDefaultStream(bool waitOnSelf)
{
LockedAccessor_DeviceCrit_t crit(_criticalData);
@@ -904,7 +934,7 @@ void ihipDevice_t::locked_syncDefaultStream(bool waitOnSelf)
}
//---
void ihipDevice_t::locked_addStream(ihipStream_t *s)
void ihipCtx_t::locked_addStream(ihipStream_t *s)
{
LockedAccessor_DeviceCrit_t crit(_criticalData);
@@ -912,7 +942,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s)
}
//---
void ihipDevice_t::locked_removeStream(ihipStream_t *s)
void ihipCtx_t::locked_removeStream(ihipStream_t *s)
{
LockedAccessor_DeviceCrit_t crit(_criticalData);
@@ -922,7 +952,7 @@ void ihipDevice_t::locked_removeStream(ihipStream_t *s)
//---
//Heavyweight synchronization that waits on all streams, ignoring hipStreamNonBlocking flag.
void ihipDevice_t::locked_waitAllStreams()
void ihipCtx_t::locked_waitAllStreams()
{
LockedAccessor_DeviceCrit_t crit(_criticalData);
@@ -1115,7 +1145,7 @@ void ihipInit()
throw ihipException(hipErrorRuntimeOther);
}
g_devices = new ihipDevice_t[deviceCnt];
g_primaryCtxArray = new ihipCtx_t[deviceCnt];
g_deviceCnt = 0;
for (int i=0; i<accs.size(); i++) {
// check if the device id is included in the HIP_VISIBLE_DEVICES env variable
@@ -1125,7 +1155,7 @@ void ihipInit()
//If device is not in visible devices list, ignore
continue;
}
g_devices[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost);
g_primaryCtxArray[g_deviceCnt].init(g_deviceCnt, deviceCnt, accs[i], hipDeviceMapHost);
g_deviceCnt++;
}
}
@@ -1139,34 +1169,8 @@ void ihipInit()
}
bool ihipIsValidDevice(unsigned deviceIndex)
{
// deviceIndex is unsigned so always > 0
return (deviceIndex < g_deviceCnt);
}
//---
ihipDevice_t *ihipGetTlsDefaultDevice()
{
// If this is invalid, the TLS state is corrupt.
// This can fire if called before devices are initialized.
// TODO - consider replacing assert with error code
assert (ihipIsValidDevice(tls_defaultDevice));
return &g_devices[tls_defaultDevice];
}
//---
ihipDevice_t *ihipGetDevice(int deviceId)
{
if ((deviceId >= 0) && (deviceId < g_deviceCnt)) {
return &g_devices[deviceId];
} else {
return NULL;
}
}
//---
// Get the stream to use for a command submission.
@@ -1176,7 +1180,7 @@ ihipDevice_t *ihipGetDevice(int deviceId)
hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
{
if (stream == hipStreamNull ) {
ihipDevice_t *device = ihipGetTlsDefaultDevice();
ihipCtx_t *device = ihipGetTlsDefaultCtx();
#ifndef HIP_API_PER_THREAD_DEFAULT_STREAM
device->locked_syncDefaultStream(false);
@@ -1424,7 +1428,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked,
void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType, hsa_agent_t *srcAgent, hsa_agent_t *dstAgent)
{
// current* represents the device associated with the specified stream.
ihipDevice_t *streamDevice = this->getDevice();
ihipCtx_t *streamDevice = this->getDevice();
hsa_agent_t streamAgent = streamDevice->_hsa_agent;
// ROCR runtime logic is :
@@ -1444,7 +1448,7 @@ void ihipStream_t::setAsyncCopyAgents(unsigned kind, ihipCommand_t *commandType,
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind)
{
ihipDevice_t *device = this->getDevice();
ihipCtx_t *device = this->getDevice();
if (device == NULL) {
throw ihipException(hipErrorInvalidDevice);
}
@@ -1470,7 +1474,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
#if USE_PEER_TO_PEER>=2
// TODO - consider refactor. Do we need to support simul access of enable/disable peers with access?
LockedAccessor_DeviceCrit_t dcrit(device->criticalData());
if (dcrit->isPeer(::getDevice(dstPtrInfo._appId)) && (dcrit->isPeer(::getDevice(srcPtrInfo._appId)))) {
if (dcrit->isPeer(ihipGetDevice(dstPtrInfo._appId)) && (dcrit->isPeer(ihipGetDevice(srcPtrInfo._appId)))) {
copyEngineCanSeeSrcAndDest = true;
}
#endif
@@ -1654,7 +1658,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
{
LockedAccessor_StreamCrit_t crit(_criticalData);
ihipDevice_t *device = this->getDevice();
ihipCtx_t *device = this->getDevice();
if (device == NULL) {
throw ihipException(hipErrorInvalidDevice);
@@ -1740,7 +1744,7 @@ hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc)
{
HIP_INIT_API(deviceId, acc);
ihipDevice_t *d = ihipGetDevice(deviceId);
ihipCtx_t *d = ihipGetDevice(deviceId);
hipError_t err;
if (d == NULL) {
err = hipErrorInvalidDevice;
@@ -1761,7 +1765,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
HIP_INIT_API(stream, av);
if (stream == hipStreamNull ) {
ihipDevice_t *device = ihipGetTlsDefaultDevice();
ihipCtx_t *device = ihipGetTlsDefaultCtx();
stream = device->_default_stream;
}
@@ -1775,7 +1779,7 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
// TODO - describe naming convention. ihip _. No accessors. No early returns from functions. Set status to success at top, only set error codes in implementation. No tabs.
// Caps convention _ or camelCase
// if { }
// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipDevice_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t).
// Should use ihip* data structures inside code rather than app-facing hip. For example, use ihipCtx_t (rather than hipDevice_t), ihipStream_t (rather than hipStream_t).
// locked_
// TODO - describe MT strategy
//
+43 -43
Просмотреть файл
@@ -120,18 +120,18 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
if (device) {
if (ctx) {
const unsigned am_flags = 0;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_device_index, 0);
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
if (crit->peerCnt()) {
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
}
@@ -152,25 +152,25 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
if(device){
if(ctx){
if(flags == hipHostMallocDefault){
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned);
if(sizeBytes < 1 && (*ptr == NULL)){
hip_status = hipErrorMemoryAllocation;
}else{
hc::am_memtracker_update(*ptr, device->_device_index, amHostPinned);
hc::am_memtracker_update(*ptr, ctx->_device_index, amHostPinned);
}
tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr);
} else if(flags & hipHostMallocMapped){
*ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned);
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, amHostPinned);
if(sizeBytes && (*ptr == NULL)){
hip_status = hipErrorMemoryAllocation;
}else{
hc::am_memtracker_update(*ptr, device->_device_index, flags);
hc::am_memtracker_update(*ptr, ctx->_device_index, flags);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
if (crit->peerCnt()) {
hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
}
@@ -212,19 +212,19 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
*pitch = ((((int)width-1)/128) + 1)*128;
const size_t sizeBytes = (*pitch)*height;
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
//err = hipMalloc(ptr, (*pitch)*height);
if (device) {
if (ctx) {
const unsigned am_flags = 0;
*ptr = hc::am_alloc(sizeBytes, device->_acc, am_flags);
*ptr = hc::am_alloc(sizeBytes, ctx->_acc, am_flags);
if (sizeBytes && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_device_index, 0);
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
@@ -255,7 +255,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
*array = (hipArray*)malloc(sizeof(hipArray));
array[0]->width = width;
@@ -265,22 +265,22 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
void ** ptr = &array[0]->data;
if (device) {
if (ctx) {
const unsigned am_flags = 0;
const size_t size = width*height;
switch(desc->f) {
case hipChannelFormatKindSigned:
*ptr = hc::am_alloc(size*sizeof(int), device->_acc, am_flags);
*ptr = hc::am_alloc(size*sizeof(int), ctx->_acc, am_flags);
break;
case hipChannelFormatKindUnsigned:
*ptr = hc::am_alloc(size*sizeof(unsigned int), device->_acc, am_flags);
*ptr = hc::am_alloc(size*sizeof(unsigned int), ctx->_acc, am_flags);
break;
case hipChannelFormatKindFloat:
*ptr = hc::am_alloc(size*sizeof(float), device->_acc, am_flags);
*ptr = hc::am_alloc(size*sizeof(float), ctx->_acc, am_flags);
break;
case hipChannelFormatKindNone:
*ptr = hc::am_alloc(size*sizeof(size_t), device->_acc, am_flags);
*ptr = hc::am_alloc(size*sizeof(size_t), ctx->_acc, am_flags);
break;
default:
hip_status = hipErrorUnknown;
@@ -289,9 +289,9 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
} else {
hc::am_memtracker_update(*ptr, device->_device_index, 0);
hc::am_memtracker_update(*ptr, ctx->_device_index, 0);
{
LockedAccessor_DeviceCrit_t crit(device->criticalData());
LockedAccessor_DeviceCrit_t crit(ctx->criticalData());
if (crit->peerCnt() > 1) { // peerCnt includes self so only call allow_access if other peers involved:
hsa_status_t hsa_status = hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr);
if (hsa_status != HSA_STATUS_SUCCESS) {
@@ -342,7 +342,7 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
hipError_t hip_status = hipSuccess;
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
if(hostPtr == NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
@@ -354,17 +354,17 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
if(am_status == AM_SUCCESS){
hip_status = hipErrorHostMemoryAlreadyRegistered;
}else{
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
if(hostPtr == NULL){
return ihipLogStatus(hipErrorInvalidValue);
}
if(device){
if(ctx){
if(flags == hipHostRegisterDefault || flags == hipHostRegisterPortable || flags == hipHostRegisterMapped){
std::vector<hc::accelerator>vecAcc;
for(int i=0;i<g_deviceCnt;i++){
vecAcc.push_back(g_devices[i]._acc);
vecAcc.push_back(ihipGetDevice(i)->_acc);
}
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
am_status = hc::am_memory_host_lock(ctx->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
if(am_status == AM_SUCCESS){
hip_status = hipSuccess;
}else{
@@ -382,12 +382,12 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
hipError_t hipHostUnregister(void *hostPtr)
{
HIP_INIT_API(hostPtr);
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
hipError_t hip_status = hipSuccess;
if(hostPtr == NULL){
hip_status = hipErrorInvalidValue;
}else{
am_status_t am_status = hc::am_memory_host_unlock(device->_acc, hostPtr);
am_status_t am_status = hc::am_memory_host_unlock(ctx->_acc, hostPtr);
if(am_status != AM_SUCCESS){
hip_status = hipErrorHostMemoryNotRegistered;
}
@@ -406,13 +406,13 @@ hipError_t hipMemcpyToSymbol(const char* symbolName, const void *src, size_t cou
{
return ihipLogStatus(hipErrorInvalidValue);
}
auto device = ihipGetTlsDefaultDevice();
auto ctx = ihipGetTlsDefaultCtx();
//hsa_signal_t depSignal;
//int depSignalCnt = device._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D);
//int depSignalCnt = ctx._default_stream->preCopyCommand(NULL, &depSignal, ihipCommandCopyH2D);
assert(0); // Need to properly synchronize the copy - do something with depSignal if != NULL.
device->_acc.memcpy_symbol(symbolName, (void*) src,count, offset);
ctx->_acc.memcpy_symbol(symbolName, (void*) src,count, offset);
#endif
return ihipLogStatus(hipSuccess);
}
@@ -692,19 +692,19 @@ hipError_t hipMemGetInfo (size_t *free, size_t *total)
hipError_t e = hipSuccess;
ihipDevice_t * hipDevice = ihipGetTlsDefaultDevice();
if (hipDevice) {
ihipCtx_t * ctx = ihipGetTlsDefaultCtx();
if (ctx) {
if (total) {
*total = hipDevice->_props.totalGlobalMem;
*total = ctx->_props.totalGlobalMem;
}
if (free) {
// TODO - replace with kernel-level for reporting free memory:
size_t deviceMemSize, hostMemSize, userMemSize;
hc::am_memtracker_sizeinfo(hipDevice->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
hc::am_memtracker_sizeinfo(ctx->_acc, &deviceMemSize, &hostMemSize, &userMemSize);
printf ("deviceMemSize=%zu\n", deviceMemSize);
*free = hipDevice->_props.totalGlobalMem - deviceMemSize;
*free = ctx->_props.totalGlobalMem - deviceMemSize;
}
} else {
@@ -723,7 +723,7 @@ hipError_t hipFree(void* ptr)
hipError_t hipStatus = hipErrorInvalidDevicePointer;
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
if (ptr) {
hc::accelerator acc;
@@ -749,7 +749,7 @@ hipError_t hipHostFree(void* ptr)
HIP_INIT_API(ptr);
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
hipError_t hipStatus = hipErrorInvalidValue;
@@ -785,7 +785,7 @@ hipError_t hipFreeArray(hipArray* array)
hipError_t hipStatus = hipErrorInvalidDevicePointer;
// Synchronize to ensure all work has finished.
ihipGetTlsDefaultDevice()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
ihipGetTlsDefaultCtx()->locked_waitAllStreams(); // ignores non-blocking streams, this waits for all activity to finish.
if(array->data) {
hc::accelerator acc;
+2 -2
Просмотреть файл
@@ -66,7 +66,7 @@ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId)
hipError_t err = hipSuccess;
auto thisDevice = ihipGetTlsDefaultDevice();
auto thisDevice = ihipGetTlsDefaultCtx();
auto peerDevice = ihipGetDevice(peerDeviceId);
if ((thisDevice != NULL) && (peerDevice != NULL)) {
#if USE_PEER_TO_PEER>=2
@@ -111,7 +111,7 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags)
if (flags != 0) {
err = hipErrorInvalidValue;
} else {
auto thisDevice = ihipGetTlsDefaultDevice();
auto thisDevice = ihipGetTlsDefaultCtx();
auto peerDevice = ihipGetDevice(peerDeviceId);
if (thisDevice == peerDevice) {
err = hipErrorInvalidDevice; // Can't enable peer access to self.
+11 -11
Просмотреть файл
@@ -30,8 +30,8 @@ THE SOFTWARE.
//---
hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
{
ihipDevice_t *device = ihipGetTlsDefaultDevice();
hc::accelerator acc = device->_acc;
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
hc::accelerator acc = ctx->_acc;
// TODO - se try-catch loop to detect memory exception?
//
@@ -39,9 +39,9 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags)
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
//This matches CUDA stream behavior:
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags);
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
device->locked_addStream(istream);
ctx->locked_addStream(istream);
*stream = istream;
tprintf(DB_SYNC, "hipStreamCreate, stream=%p\n", *stream);
@@ -98,8 +98,8 @@ hipError_t hipStreamSynchronize(hipStream_t stream)
hipError_t e = hipSuccess;
if (stream == NULL) {
ihipDevice_t *device = ihipGetTlsDefaultDevice();
device->locked_syncDefaultStream(true/*waitOnSelf*/);
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true/*waitOnSelf*/);
} else {
stream->locked_wait();
e = hipSuccess;
@@ -122,17 +122,17 @@ hipError_t hipStreamDestroy(hipStream_t stream)
//--- Drain the stream:
if (stream == NULL) {
ihipDevice_t *device = ihipGetTlsDefaultDevice();
device->locked_syncDefaultStream(true/*waitOnSelf*/);
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true/*waitOnSelf*/);
} else {
stream->locked_wait();
e = hipSuccess;
}
ihipDevice_t *device = stream->getDevice();
ihipCtx_t *ctx = stream->getDevice();
if (device) {
device->locked_removeStream(stream);
if (ctx) {
ctx->locked_removeStream(stream);
delete stream;
} else {
e = hipErrorInvalidResourceHandle;