SWDEV-193956 - [hipclang-vdi-rocm][perf]
~45% to 50% of Performance drop on rocBLAS_int8 test
Add support for active waits without blocking the host thread.
Change-Id: Ie7bb48dcafcb4c93d448bf74749b829b626c3578
[ROCm/clr commit: 0fc433e076]
This commit is contained in:
@@ -69,6 +69,11 @@ extern void DeviceUnload();
|
||||
|
||||
namespace device {
|
||||
extern const char* BlitSourceCode;
|
||||
|
||||
bool VirtualDevice::ActiveWait() const {
|
||||
return device_().ActiveWait();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace amd {
|
||||
@@ -223,6 +228,7 @@ void Device::tearDown() {
|
||||
Device::Device()
|
||||
: settings_(nullptr),
|
||||
online_(true),
|
||||
activeWait_(false),
|
||||
blitProgram_(nullptr),
|
||||
hwDebugMgr_(nullptr),
|
||||
vaCacheAccess_(nullptr),
|
||||
|
||||
@@ -1113,6 +1113,9 @@ class VirtualDevice : public amd::HeapObject {
|
||||
//! Returns the virtual device unique index
|
||||
uint index() const { return index_; }
|
||||
|
||||
//! Returns true if device has active wait setting
|
||||
bool ActiveWait() const;
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
VirtualDevice& operator=(const VirtualDevice&);
|
||||
@@ -1420,13 +1423,25 @@ class Device : public RuntimeObject {
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Returns active wait state for this device
|
||||
bool ActiveWait() const { return activeWait_; }
|
||||
|
||||
void SetActiveWait(bool state) { activeWait_ = state; }
|
||||
|
||||
protected:
|
||||
//! Enable the specified extension
|
||||
char* getExtensionString();
|
||||
|
||||
device::Info info_; //!< Device info structure
|
||||
device::Settings* settings_; //!< Device settings
|
||||
bool online_; //!< The device in online
|
||||
union {
|
||||
struct {
|
||||
uint32_t online_: 1; //!< The device in online
|
||||
uint32_t activeWait_: 1; //!< If true device requires active wait
|
||||
};
|
||||
uint32_t state_; //!< State bit mask
|
||||
};
|
||||
|
||||
BlitProgram* blitProgram_; //!< Blit program info
|
||||
static AppProfile appProfile_; //!< application profile
|
||||
HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager
|
||||
|
||||
@@ -190,12 +190,17 @@ bool Event::awaitCompletion() {
|
||||
}
|
||||
|
||||
ClPrint(LOG_DEBUG, LOG_WAIT, "waiting for event %p to complete, current status %d", this, status_);
|
||||
if (command().queue()->vdev()->ActiveWait()) {
|
||||
while (status_ > CL_COMPLETE) {
|
||||
amd::Os::yield();
|
||||
}
|
||||
} else {
|
||||
ScopedLock lock(lock_);
|
||||
|
||||
ScopedLock lock(lock_);
|
||||
|
||||
// Wait until the status becomes CL_COMPLETE or negative.
|
||||
while (status_ > CL_COMPLETE) {
|
||||
lock_.wait();
|
||||
// Wait until the status becomes CL_COMPLETE or negative.
|
||||
while (status_ > CL_COMPLETE) {
|
||||
lock_.wait();
|
||||
}
|
||||
}
|
||||
|
||||
ClPrint(LOG_DEBUG, LOG_WAIT, "event %p wait completed", this);
|
||||
|
||||
مرجع در شماره جدید
Block a user