Protect _stream_id as well.
- move lockedaccessor
- clean up device class.
- add simple ihipDevice constructor.
[ROCm/clr commit: c47b5b04ef]
This commit is contained in:
@@ -275,23 +275,47 @@ class FakeMutex
|
||||
#if STREAM_THREAD_SAFE
|
||||
typedef std::mutex StreamMutex;
|
||||
#else
|
||||
#warning "Stream thread-safe disabled"
|
||||
typedef FakeMutex StreamMutex;
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
//---
|
||||
// Protects access to the member _data with a lock acquired on contruction/destruction.
|
||||
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
|
||||
template<typename T>
|
||||
class LockedAccessor
|
||||
{
|
||||
public:
|
||||
LockedAccessor(T &criticalData) : _criticalData(&criticalData)
|
||||
{
|
||||
_criticalData->_mutex.lock();
|
||||
};
|
||||
|
||||
~LockedAccessor()
|
||||
{
|
||||
_criticalData->_mutex.unlock();
|
||||
}
|
||||
|
||||
// Syntactic sugar so -> can be used to get the underlying type.
|
||||
T *operator->() { return _criticalData; };
|
||||
|
||||
private:
|
||||
T *_criticalData;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO - move async copy code into stream? Stream->async-copy.
|
||||
// Add PreCopy / PostCopy to manage locks?
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
// Internal stream structure.
|
||||
class ihipStream_t {
|
||||
public:
|
||||
typedef uint64_t SeqNum_t ;
|
||||
|
||||
ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags);
|
||||
ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags);
|
||||
~ihipStream_t();
|
||||
|
||||
// kind is hipMemcpyKind
|
||||
@@ -379,29 +403,6 @@ struct ihipEvent_t {
|
||||
} ;
|
||||
|
||||
|
||||
//---
|
||||
// Protects access to the member _data with a lock acquired on contruction/destruction.
|
||||
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
|
||||
template<typename T>
|
||||
class LockedAccessor
|
||||
{
|
||||
public:
|
||||
LockedAccessor(T &data) : _data(&data)
|
||||
{
|
||||
_data->_mutex.lock();
|
||||
};
|
||||
|
||||
~LockedAccessor()
|
||||
{
|
||||
_data->_mutex.unlock();
|
||||
}
|
||||
|
||||
// Syntactic sugar so -> can be used to get the underlying type.
|
||||
T *operator->() { return _data; };
|
||||
|
||||
private:
|
||||
T *_data;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -415,17 +416,22 @@ template <typename MUTEX_TYPE>
|
||||
class ihipDeviceCriticalBase_t
|
||||
{
|
||||
public:
|
||||
ihipDeviceCriticalBase_t() : _stream_id(0) {};
|
||||
friend class LockedAccessor<ihipDeviceCriticalBase_t>;
|
||||
|
||||
std::list<ihipStream_t*> &streams() { return _streams; };
|
||||
const std::list<ihipStream_t*> &const_streams() const { return _streams; };
|
||||
|
||||
// "Allocate" a stream ID:
|
||||
ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; };
|
||||
|
||||
|
||||
private:
|
||||
std::list<ihipStream_t*> _streams; // streams associated with this device.
|
||||
ihipStream_t::SeqNum_t _stream_id;
|
||||
MUTEX_TYPE _mutex;
|
||||
};
|
||||
|
||||
// Typedefs for common definitions.
|
||||
#if DEVICE_THREAD_SAFE
|
||||
typedef ihipDeviceCriticalBase_t<std::mutex> ihipDeviceCritical_t; // Use real mutex
|
||||
#else
|
||||
@@ -445,8 +451,15 @@ typedef LockedAccessor<ihipDeviceCritical_t> Locked_ihipDeviceCritical_t;
|
||||
class ihipDevice_t
|
||||
{
|
||||
public: // Functions:
|
||||
ihipDevice_t() {}; // note: calls constructor for _criticalData
|
||||
void init(unsigned device_index, hc::accelerator &acc, unsigned flags);
|
||||
~ihipDevice_t();
|
||||
|
||||
void locked_addStream(ihipStream_t *s);
|
||||
void locked_removeStream(ihipStream_t *s);
|
||||
void locked_reset();
|
||||
void locked_waitAllStreams();
|
||||
void locked_syncDefaultStream(bool waitOnSelf);
|
||||
|
||||
public: // Data, set at initialization:
|
||||
unsigned _device_index; // index into g_devices.
|
||||
@@ -464,20 +477,13 @@ public: // Data, set at initialization:
|
||||
|
||||
StagingBuffer *_staging_buffer[2]; // one buffer for each direction.
|
||||
|
||||
ihipStream_t::SeqNum_t _stream_id;
|
||||
|
||||
unsigned _device_flags;
|
||||
|
||||
public:
|
||||
void locked_init(unsigned device_index, hc::accelerator acc, unsigned flags);
|
||||
~ihipDevice_t();
|
||||
private:
|
||||
hipError_t getProperties(hipDeviceProp_t* prop);
|
||||
|
||||
void locked_reset();
|
||||
void locked_waitAllStreams();
|
||||
void locked_syncDefaultStream(bool waitOnSelf);
|
||||
|
||||
private:
|
||||
private: // Critical data, protected with locked access:
|
||||
// Members of _protected data MUST be accessed through the LockedAccessor.
|
||||
// Search for LockedAccessor<ihipDeviceCritical_t> for examples; do not access _criticalData directly.
|
||||
ihipDeviceCritical_t _criticalData;
|
||||
|
||||
@@ -124,8 +124,8 @@ ihipSignal_t::~ihipSignal_t()
|
||||
// ihipStream_t:
|
||||
//=================================================================================================
|
||||
//---
|
||||
ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags) :
|
||||
_id(id),
|
||||
ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) :
|
||||
_id(0), // will be set by add function.
|
||||
_av(av),
|
||||
_flags(flags),
|
||||
_device_index(device_index),
|
||||
@@ -402,13 +402,12 @@ void ihipDevice_t::locked_reset()
|
||||
|
||||
|
||||
//---
|
||||
void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsigned flags)
|
||||
void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned flags)
|
||||
{
|
||||
_stream_id = 0;
|
||||
|
||||
_device_index = device_index;
|
||||
_device_flags = flags;
|
||||
_acc = acc;
|
||||
|
||||
hsa_agent_t *agent = static_cast<hsa_agent_t*> (acc.get_hsa_agent());
|
||||
if (agent) {
|
||||
int err = hsa_agent_get_info(*agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &_compute_units);
|
||||
@@ -423,15 +422,11 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig
|
||||
|
||||
getProperties(&_props);
|
||||
|
||||
{
|
||||
Locked_ihipDeviceCritical_t l(_criticalData);
|
||||
_default_stream = new ihipStream_t(device_index, acc.get_default_view(), _stream_id++, hipStreamDefault);
|
||||
l->streams().push_back(_default_stream);
|
||||
}
|
||||
_default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault);
|
||||
locked_addStream(_default_stream);
|
||||
|
||||
tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream);
|
||||
|
||||
|
||||
hsa_region_t *pinnedHostRegion;
|
||||
pinnedHostRegion = static_cast<hsa_region_t*>(_acc.get_hsa_am_system_region());
|
||||
_staging_buffer[0] = new StagingBuffer(_hsa_agent, *pinnedHostRegion, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS);
|
||||
@@ -440,6 +435,8 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
ihipDevice_t::~ihipDevice_t()
|
||||
{
|
||||
if (_default_stream) {
|
||||
@@ -709,6 +706,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s)
|
||||
Locked_ihipDeviceCritical_t l(_criticalData);
|
||||
|
||||
l->streams().push_back(s);
|
||||
s->_id = l->incStreamId();
|
||||
}
|
||||
|
||||
//---
|
||||
@@ -895,7 +893,7 @@ void ihipInit()
|
||||
//If device is not in visible devices list, ignore
|
||||
continue;
|
||||
}
|
||||
g_devices[g_deviceCnt].locked_init(g_deviceCnt, accs[i], hipDeviceMapHost);
|
||||
g_devices[g_deviceCnt].init(g_deviceCnt, accs[i], hipDeviceMapHost);
|
||||
g_deviceCnt++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
|
||||
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
|
||||
//This matches CUDA stream behavior:
|
||||
|
||||
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), device->_stream_id, flags);
|
||||
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags);
|
||||
|
||||
device->locked_addStream(istream);
|
||||
|
||||
|
||||
@@ -109,17 +109,17 @@ int main(int argc, char *argv[])
|
||||
|
||||
// Serial version, just call once:
|
||||
if (p_tests & 0x1) {
|
||||
printf ("test 0x1 : serial createThenDestroyStreams(10) \n");
|
||||
printf ("\ntest 0x1 : serial createThenDestroyStreams(10) \n");
|
||||
createThenDestroyStreams(10, 10);
|
||||
};
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
printf ("test 0x2 : serialized multiThread_1(1) \n");
|
||||
printf ("\ntest 0x2 : serialized multiThread_pyramid(1) \n");
|
||||
multiThread_pyramid(true, 10);
|
||||
}
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
printf ("test 0x4 : multiThread_pyramid(1) \n");
|
||||
printf ("\ntest 0x4 : parallel multiThread_pyramid(1) \n");
|
||||
multiThread_pyramid(false, 10);
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ int main(int argc, char *argv[])
|
||||
// }
|
||||
|
||||
if (p_tests & 0x10) {
|
||||
printf ("test 0x10 : multiThread_tiny(1000) \n");
|
||||
printf ("\ntest 0x10 : parallel multiThread_tiny(1000) \n");
|
||||
multiThread_tiny(false, 1000);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user