SWDEV-193956 - [hipclang-vdi-rocm][perf]

~45% to 50% of Performance drop on rocBLAS_int8 test

Add support for active waits without blocking the host thread.

Change-Id: Ie7bb48dcafcb4c93d448bf74749b829b626c3578


[ROCm/clr commit: 0fc433e076]
This commit is contained in:
German Andryeyev
2020-03-02 11:39:13 -05:00
والد 123f91240d
کامیت 35dee1ac53
3فایلهای تغییر یافته به همراه32 افزوده شده و 6 حذف شده
@@ -69,6 +69,11 @@ extern void DeviceUnload();
namespace device {
extern const char* BlitSourceCode;
bool VirtualDevice::ActiveWait() const {
return device_().ActiveWait();
}
}
namespace amd {
@@ -223,6 +228,7 @@ void Device::tearDown() {
Device::Device()
: settings_(nullptr),
online_(true),
activeWait_(false),
blitProgram_(nullptr),
hwDebugMgr_(nullptr),
vaCacheAccess_(nullptr),
@@ -1113,6 +1113,9 @@ class VirtualDevice : public amd::HeapObject {
//! Returns the virtual device unique index
uint index() const { return index_; }
//! Returns true if device has active wait setting
bool ActiveWait() const;
private:
//! Disable default copy constructor
VirtualDevice& operator=(const VirtualDevice&);
@@ -1420,13 +1423,25 @@ class Device : public RuntimeObject {
return false;
}
//! Returns active wait state for this device
bool ActiveWait() const { return activeWait_; }
void SetActiveWait(bool state) { activeWait_ = state; }
protected:
//! Enable the specified extension
char* getExtensionString();
device::Info info_; //!< Device info structure
device::Settings* settings_; //!< Device settings
bool online_; //!< The device in online
union {
struct {
uint32_t online_: 1; //!< The device in online
uint32_t activeWait_: 1; //!< If true device requires active wait
};
uint32_t state_; //!< State bit mask
};
BlitProgram* blitProgram_; //!< Blit program info
static AppProfile appProfile_; //!< application profile
HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager
@@ -190,12 +190,17 @@ bool Event::awaitCompletion() {
}
ClPrint(LOG_DEBUG, LOG_WAIT, "waiting for event %p to complete, current status %d", this, status_);
if (command().queue()->vdev()->ActiveWait()) {
while (status_ > CL_COMPLETE) {
amd::Os::yield();
}
} else {
ScopedLock lock(lock_);
ScopedLock lock(lock_);
// Wait until the status becomes CL_COMPLETE or negative.
while (status_ > CL_COMPLETE) {
lock_.wait();
// Wait until the status becomes CL_COMPLETE or negative.
while (status_ > CL_COMPLETE) {
lock_.wait();
}
}
ClPrint(LOG_DEBUG, LOG_WAIT, "event %p wait completed", this);