From 0b0c55af04fbbb6c5d9249297bd7658a65a446c2 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 26 Mar 2016 11:45:25 -0500 Subject: [PATCH] Protect _stream_id as well. - move lockedaccessor - clean up device class. - add simple ihipDevice constructor. [ROCm/clr commit: c47b5b04ef3f1d897c345270f78df351056d380b] --- .../clr/hipamd/include/hcc_detail/hip_hcc.h | 84 ++++++++++--------- projects/clr/hipamd/src/hip_hcc.cpp | 22 +++-- projects/clr/hipamd/src/hip_stream.cpp | 2 +- .../hipamd/tests/src/hipThreadSafeDevice.cpp | 8 +- 4 files changed, 60 insertions(+), 56 deletions(-) diff --git a/projects/clr/hipamd/include/hcc_detail/hip_hcc.h b/projects/clr/hipamd/include/hcc_detail/hip_hcc.h index 409573d2dc..4e8b513f60 100644 --- a/projects/clr/hipamd/include/hcc_detail/hip_hcc.h +++ b/projects/clr/hipamd/include/hcc_detail/hip_hcc.h @@ -275,23 +275,47 @@ class FakeMutex #if STREAM_THREAD_SAFE typedef std::mutex StreamMutex; #else +#warning "Stream thread-safe disabled" typedef FakeMutex StreamMutex; #endif +// +//--- +// Protects access to the member _data with a lock acquired on contruction/destruction. +// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock) +template +class LockedAccessor +{ +public: + LockedAccessor(T &criticalData) : _criticalData(&criticalData) + { + _criticalData->_mutex.lock(); + }; + + ~LockedAccessor() + { + _criticalData->_mutex.unlock(); + } + + // Syntactic sugar so -> can be used to get the underlying type. + T *operator->() { return _criticalData; }; + +private: + T *_criticalData; +}; + + + + // TODO - move async copy code into stream? Stream->async-copy. // Add PreCopy / PostCopy to manage locks? -// - - - - // Internal stream structure. class ihipStream_t { public: typedef uint64_t SeqNum_t ; - ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags); + ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags); ~ihipStream_t(); // kind is hipMemcpyKind @@ -379,29 +403,6 @@ struct ihipEvent_t { } ; -//--- -// Protects access to the member _data with a lock acquired on contruction/destruction. -// T must contain a _mutex field which meets the BasicLockable requirements (lock/unlock) -template -class LockedAccessor -{ -public: - LockedAccessor(T &data) : _data(&data) - { - _data->_mutex.lock(); - }; - - ~LockedAccessor() - { - _data->_mutex.unlock(); - } - - // Syntactic sugar so -> can be used to get the underlying type. - T *operator->() { return _data; }; - -private: - T *_data; -}; @@ -415,17 +416,22 @@ template class ihipDeviceCriticalBase_t { public: + ihipDeviceCriticalBase_t() : _stream_id(0) {}; friend class LockedAccessor; std::list &streams() { return _streams; }; const std::list &const_streams() const { return _streams; }; + // "Allocate" a stream ID: + ihipStream_t::SeqNum_t incStreamId() { return _stream_id++; }; + + private: std::list _streams; // streams associated with this device. + ihipStream_t::SeqNum_t _stream_id; MUTEX_TYPE _mutex; }; -// Typedefs for common definitions. #if DEVICE_THREAD_SAFE typedef ihipDeviceCriticalBase_t ihipDeviceCritical_t; // Use real mutex #else @@ -445,8 +451,15 @@ typedef LockedAccessor Locked_ihipDeviceCritical_t; class ihipDevice_t { public: // Functions: + ihipDevice_t() {}; // note: calls constructor for _criticalData + void init(unsigned device_index, hc::accelerator &acc, unsigned flags); + ~ihipDevice_t(); + void locked_addStream(ihipStream_t *s); void locked_removeStream(ihipStream_t *s); + void locked_reset(); + void locked_waitAllStreams(); + void locked_syncDefaultStream(bool waitOnSelf); public: // Data, set at initialization: unsigned _device_index; // index into g_devices. @@ -464,20 +477,13 @@ public: // Data, set at initialization: StagingBuffer *_staging_buffer[2]; // one buffer for each direction. - ihipStream_t::SeqNum_t _stream_id; unsigned _device_flags; -public: - void locked_init(unsigned device_index, hc::accelerator acc, unsigned flags); - ~ihipDevice_t(); +private: hipError_t getProperties(hipDeviceProp_t* prop); - void locked_reset(); - void locked_waitAllStreams(); - void locked_syncDefaultStream(bool waitOnSelf); - -private: +private: // Critical data, protected with locked access: // Members of _protected data MUST be accessed through the LockedAccessor. // Search for LockedAccessor for examples; do not access _criticalData directly. ihipDeviceCritical_t _criticalData; diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index cb216919e2..ce136addd9 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -124,8 +124,8 @@ ihipSignal_t::~ihipSignal_t() // ihipStream_t: //================================================================================================= //--- -ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, SeqNum_t id, unsigned int flags) : - _id(id), +ihipStream_t::ihipStream_t(unsigned device_index, hc::accelerator_view av, unsigned int flags) : + _id(0), // will be set by add function. _av(av), _flags(flags), _device_index(device_index), @@ -402,13 +402,12 @@ void ihipDevice_t::locked_reset() //--- -void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsigned flags) +void ihipDevice_t::init(unsigned device_index, hc::accelerator &acc, unsigned flags) { - _stream_id = 0; - _device_index = device_index; _device_flags = flags; _acc = acc; + hsa_agent_t *agent = static_cast (acc.get_hsa_agent()); if (agent) { int err = hsa_agent_get_info(*agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &_compute_units); @@ -423,15 +422,11 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig getProperties(&_props); - { - Locked_ihipDeviceCritical_t l(_criticalData); - _default_stream = new ihipStream_t(device_index, acc.get_default_view(), _stream_id++, hipStreamDefault); - l->streams().push_back(_default_stream); - } + _default_stream = new ihipStream_t(device_index, acc.get_default_view(), hipStreamDefault); + locked_addStream(_default_stream); tprintf(DB_SYNC, "created device with default_stream=%p\n", _default_stream); - hsa_region_t *pinnedHostRegion; pinnedHostRegion = static_cast(_acc.get_hsa_am_system_region()); _staging_buffer[0] = new StagingBuffer(_hsa_agent, *pinnedHostRegion, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); @@ -440,6 +435,8 @@ void ihipDevice_t::locked_init(unsigned device_index, hc::accelerator acc, unsig }; + + ihipDevice_t::~ihipDevice_t() { if (_default_stream) { @@ -709,6 +706,7 @@ void ihipDevice_t::locked_addStream(ihipStream_t *s) Locked_ihipDeviceCritical_t l(_criticalData); l->streams().push_back(s); + s->_id = l->incStreamId(); } //--- @@ -895,7 +893,7 @@ void ihipInit() //If device is not in visible devices list, ignore continue; } - g_devices[g_deviceCnt].locked_init(g_deviceCnt, accs[i], hipDeviceMapHost); + g_devices[g_deviceCnt].init(g_deviceCnt, accs[i], hipDeviceMapHost); g_deviceCnt++; } } diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp index 784fa9224c..9e0d32e971 100644 --- a/projects/clr/hipamd/src/hip_stream.cpp +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -41,7 +41,7 @@ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) //Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete: //This matches CUDA stream behavior: - auto istream = new ihipStream_t(device->_device_index, acc.create_view(), device->_stream_id, flags); + auto istream = new ihipStream_t(device->_device_index, acc.create_view(), flags); device->locked_addStream(istream); diff --git a/projects/clr/hipamd/tests/src/hipThreadSafeDevice.cpp b/projects/clr/hipamd/tests/src/hipThreadSafeDevice.cpp index fca994cba8..0534d6fbae 100644 --- a/projects/clr/hipamd/tests/src/hipThreadSafeDevice.cpp +++ b/projects/clr/hipamd/tests/src/hipThreadSafeDevice.cpp @@ -109,17 +109,17 @@ int main(int argc, char *argv[]) // Serial version, just call once: if (p_tests & 0x1) { - printf ("test 0x1 : serial createThenDestroyStreams(10) \n"); + printf ("\ntest 0x1 : serial createThenDestroyStreams(10) \n"); createThenDestroyStreams(10, 10); }; if (p_tests & 0x2) { - printf ("test 0x2 : serialized multiThread_1(1) \n"); + printf ("\ntest 0x2 : serialized multiThread_pyramid(1) \n"); multiThread_pyramid(true, 10); } if (p_tests & 0x4) { - printf ("test 0x4 : multiThread_pyramid(1) \n"); + printf ("\ntest 0x4 : parallel multiThread_pyramid(1) \n"); multiThread_pyramid(false, 10); } @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) // } if (p_tests & 0x10) { - printf ("test 0x10 : multiThread_tiny(1000) \n"); + printf ("\ntest 0x10 : parallel multiThread_tiny(1000) \n"); multiThread_tiny(false, 1000); }