Protect _stream_id as well.

- move lockedaccessor
- clean up device class.
- add simple ihipDevice constructor.


[ROCm/clr commit: c47b5b04ef]
This commit is contained in:
Ben Sander
2016-03-26 11:45:25 -05:00
rodzic e16e848d55
commit 0b0c55af04
4 zmienionych plików z 60 dodań i 56 usunięć
@@ -275,23 +275,47 @@ class FakeMutex
#if STREAM_THREAD_SAFE
typedef std::mutex StreamMutex;
#else
#warning "Stream thread-safe disabled"
typedef FakeMutex StreamMutex;
#endif
//
//---
// Protects access to the member _data with a lock acquired on contruction/destruction.
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
template<typename T>
class LockedAccessor
{
public:
LockedAccessor(T &criticalData) : _criticalData(&criticalData)
{
_criticalData->_mutex.lock();
};
~LockedAccessor()
{
_criticalData->_mutex.unlock();
}
// Syntactic sugar so -> can be used to get the underlying type.
T *operator->() { return _criticalData; };
private:
T *_criticalData;
};
// TODO - move async copy code into stream? Stream->async-copy.
// Add PreCopy / PostCopy to manage locks?
//
// Internal stream structure.
class ihipStream_t {
public:
typedef uint64_t SeqNum_t ;
ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags);
ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags);
~ihipStream_t();
// kind is hipMemcpyKind
@@ -379,29 +403,6 @@ struct ihipEvent_t {
} ;
//---
// Protects access to the member _data with a lock acquired on contruction/destruction.
// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock)
template<typename T>
class LockedAccessor
{
public:
LockedAccessor(T &data) : _data(&data)
{
_data->_mutex.lock();
};
~LockedAccessor()
{
_data->_mutex.unlock();
}
// Syntactic sugar so -> can be used to get the underlying type.
T *operator->() { return _data; };
private:
T *_data;
};
@@ -415,17 +416,22 @@ template <typename MUTEX_TYPE>
class ihipDeviceCriticalBase_t
{
public:
ihipDeviceCriticalBase_t() : _stream_id(0) {};
friend class LockedAccessor<ihipDeviceCriticalBase_t>;
std::list<ihipStream_t*> &streams() { return _streams; };
const std::list<ihipStream_t*> &const_streams() const { return _streams; };
// "Allocate" a stream ID:
ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; };
private:
std::list<ihipStream_t*> _streams; // streams associated with this device.
ihipStream_t::SeqNum_t _stream_id;
MUTEX_TYPE _mutex;
};
// Typedefs for common definitions.
#if DEVICE_THREAD_SAFE
typedef ihipDeviceCriticalBase_t<std::mutex> ihipDeviceCritical_t; // Use real mutex
#else
@@ -445,8 +451,15 @@ typedef LockedAccessor<ihipDeviceCritical_t> Locked_ihipDeviceCritical_t;
class ihipDevice_t
{
public: // Functions:
ihipDevice_t() {}; // note: calls constructor for _criticalData
void init(unsigned device_index, hc::accelerator &acc, unsigned flags);
~ihipDevice_t();
void locked_addStream(ihipStream_t *s);
void locked_removeStream(ihipStream_t *s);
void locked_reset();
void locked_waitAllStreams();
void locked_syncDefaultStream(bool waitOnSelf);
public: // Data, set at initialization:
unsigned _device_index; // index into g_devices.
@@ -464,20 +477,13 @@ public: // Data, set at initialization:
StagingBuffer *_staging_buffer[2]; // one buffer for each direction.
ihipStream_t::SeqNum_t _stream_id;
unsigned _device_flags;
public:
void locked_init(unsigned device_index, hc::accelerator acc, unsigned flags);
~ihipDevice_t();
private:
hipError_t getProperties(hipDeviceProp_t* prop);
void locked_reset();
void locked_waitAllStreams();
void locked_syncDefaultStream(bool waitOnSelf);
private:
private: // Critical data, protected with locked access:
// Members of _protected data MUST be accessed through the LockedAccessor.
// Search for LockedAccessor<ihipDeviceCritical_t> for examples; do not access _criticalData directly.
ihipDeviceCritical_t _criticalData;
+10 -12
Wyświetl plik
@@ -124,8 +124,8 @@ ihipSignal_t::~ihipSignal_t()
// ihipStream_t:
//=================================================================================================
//---
ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags) :
_id(id),
ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) :
_id(0), // will be set by add function.
_av(av),
_flags(flags),
_device_index(device_index),
@@ -402,13 +402,12 @@ void ihipDevice_t::locked_reset()
//---
void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsigned flags)
void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned flags)
{
_stream_id = 0;
_device_index = device_index;
_device_flags = flags;
_acc = acc;
hsa_agent_t *agent = static_cast<hsa_agent_t*> (acc.get_hsa_agent());
if (agent) {
int err = hsa_agent_get_info(*agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &_compute_units);
@@ -423,15 +422,11 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig
getProperties(&_props);
{
Locked_ihipDeviceCritical_t l(_criticalData);
_default_stream = new ihipStream_t(device_index, acc.get_default_view(), _stream_id++, hipStreamDefault);
l->streams().push_back(_default_stream);
}
_default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault);
locked_addStream(_default_stream);
tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream);
hsa_region_t *pinnedHostRegion;
pinnedHostRegion = static_cast<hsa_region_t*>(_acc.get_hsa_am_system_region());
_staging_buffer[0] = new StagingBuffer(_hsa_agent, *pinnedHostRegion, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS);
@@ -440,6 +435,8 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig
};
ihipDevice_t::~ihipDevice_t()
{
if (_default_stream) {
@@ -709,6 +706,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s)
Locked_ihipDeviceCritical_t l(_criticalData);
l->streams().push_back(s);
s->_id = l->incStreamId();
}
//---
@@ -895,7 +893,7 @@ void ihipInit()
//If device is not in visible devices list, ignore
continue;
}
g_devices[g_deviceCnt].locked_init(g_deviceCnt, accs[i], hipDeviceMapHost);
g_devices[g_deviceCnt].init(g_deviceCnt, accs[i], hipDeviceMapHost);
g_deviceCnt++;
}
}
+1 -1
Wyświetl plik
@@ -41,7 +41,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags)
//Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete:
//This matches CUDA stream behavior:
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), device->_stream_id, flags);
auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags);
device->locked_addStream(istream);
@@ -109,17 +109,17 @@ int main(int argc, char *argv[])
// Serial version, just call once:
if (p_tests & 0x1) {
printf ("test 0x1 : serial createThenDestroyStreams(10) \n");
printf ("\ntest 0x1 : serial createThenDestroyStreams(10) \n");
createThenDestroyStreams(10, 10);
};
if (p_tests & 0x2) {
printf ("test 0x2 : serialized multiThread_1(1) \n");
printf ("\ntest 0x2 : serialized multiThread_pyramid(1) \n");
multiThread_pyramid(true, 10);
}
if (p_tests & 0x4) {
printf ("test 0x4 : multiThread_pyramid(1) \n");
printf ("\ntest 0x4 : parallel multiThread_pyramid(1) \n");
multiThread_pyramid(false, 10);
}
@@ -129,7 +129,7 @@ int main(int argc, char *argv[])
// }
if (p_tests & 0x10) {
printf ("test 0x10 : multiThread_tiny(1000) \n");
printf ("\ntest 0x10 : parallel multiThread_tiny(1000) \n");
multiThread_tiny(false, 1000);
}