2
0

SWDEV-366636 - Fix performance drop in TF-RCCL models

Change-Id: Idc845bb0dab858b94b9d2720cae8308cac2e7328


[ROCm/clr commit: f98dcf9d7f]
Este cometimento está contido em:
Anusha GodavarthySurya
2022-12-01 13:32:35 +00:00
cometido por Anusha Godavarthy Surya
ascendente 53073c4657
cometimento 06ffd15060
4 ficheiros modificados com 27 adições e 8 eliminações
+8 -3
Ver ficheiro
@@ -1786,9 +1786,14 @@ class Device : public RuntimeObject {
// Returns the status of HW event, associated with amd::Event
virtual bool IsHwEventReady(
const amd::Event& event, //!< AMD event for HW status validation
bool wait = false //!< If true then forces the event completion
) const {
const amd::Event& event, //!< AMD event for HW status validation
bool wait = false) const { //!< If true then forces the event completion
return false;
};
// Returns the status of HW event, associated with amd::Event
virtual bool IsHwEventReadyForcedWait(
const amd::Event& event) const { //!< AMD event for HW status validation
return false;
};
+14 -2
Ver ficheiro
@@ -2710,10 +2710,22 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
return result;
}
// ================================================================================================
bool Device::IsHwEventReadyForcedWait(const amd::Event& event) const {
void* hw_event =
(event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
if (hw_event == nullptr) {
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event");
return false;
}
static constexpr bool Timeout = true;
return WaitForSignal<Timeout>(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, false, true);
}
// ================================================================================================
bool Device::IsHwEventReady(const amd::Event& event, bool wait) const {
void* hw_event = (event.NotifyEvent() != nullptr) ?
event.NotifyEvent()->HwEvent() : event.HwEvent();
void* hw_event =
(event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
if (hw_event == nullptr) {
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event");
return false;
+2
Ver ficheiro
@@ -257,6 +257,7 @@ class NullDevice : public amd::Device {
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; }
virtual bool IsHwEventReadyForcedWait(const amd::Event& event) const { return false; }
virtual void getHwEventTime(const amd::Event& event, uint64_t* start, uint64_t* end) const {};
virtual void ReleaseGlobalSignal(void* signal) const {}
@@ -442,6 +443,7 @@ class Device : public NullDevice {
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const;
virtual bool IsHwEventReadyForcedWait(const amd::Event& event) const;
virtual void getHwEventTime(const amd::Event& event, uint64_t* start, uint64_t* end) const;
virtual void ReleaseGlobalSignal(void* signal) const;
+3 -3
Ver ficheiro
@@ -46,10 +46,10 @@ constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
// Active wait time out incase same sdma engine is used again,
// then just wait instead of adding dependency wait signal.
constexpr static uint64_t kSDMAEngineTimeout = 10;
constexpr static uint64_t kForcedTimeout = 10;
template <bool active_wait_timeout = false>
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool sdma_wait = false) {
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool forced_wait = false) {
if (hsa_signal_load_relaxed(signal) > 0) {
uint64_t timeout = kTimeout100us;
if (active_wait) {
@@ -57,7 +57,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool sd
}
if (active_wait_timeout) {
// If diff engine, wait to 10 ms. Otherwise no wait
timeout = (sdma_wait ? kSDMAEngineTimeout : ROC_ACTIVE_WAIT_TIMEOUT) * K;
timeout = (forced_wait ? kForcedTimeout : ROC_ACTIVE_WAIT_TIMEOUT) * K;
if (timeout == 0) {
return false;
}