Use mwaitx when busy-waiting signals

Use mwaitx instructions when busy waiting for signals to reduce CPU
energy usage.
This can be disabled by setting HSA_ENABLE_MWAITX=0

Change-Id: Ic207895a491b2bf6dacba47ef0921df3faad5b5a
Этот коммит содержится в:
David Yat Sin
2022-11-15 17:21:59 +00:00
родитель 0ed1568afc
Коммит cc48dfdbff
8 изменённых файлов: 59 добавлений и 14 удалений
+1 -1
Просмотреть файл
@@ -129,7 +129,7 @@ target_include_directories( ${CORE_RUNTIME_TARGET}
set_property(TARGET ${CORE_RUNTIME_TARGET} PROPERTY INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64" )
## ------------------------- Linux Compiler and Linker options -------------------------
set ( HSA_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=missing-braces -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function )
set ( HSA_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=missing-braces -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function -mmwaitx )
## Extra image settings - audit!
set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-deprecated-declarations )
+1
Просмотреть файл
@@ -86,6 +86,7 @@ namespace AMD {
namespace core {
extern bool g_use_interrupt_wait;
extern bool g_use_mwaitx;
/// @brief Runtime class provides the following functions:
/// - open and close connection to kernel driver.
+10 -1
Просмотреть файл
@@ -42,6 +42,9 @@
#include "core/inc/default_signal.h"
#include "core/util/timer.h"
#include <mwaitxintrin.h>
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
namespace rocr {
namespace core {
@@ -100,6 +103,8 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
timer::duration_from_seconds<timer::fast_clock::duration>(
double(timeout) / double(hsa_freq));
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
while (true) {
if (!IsValid()) return 0;
@@ -132,8 +137,12 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
}
if (time - start_time > kMaxElapsed) {
if (time - start_time > kMaxElapsed)
os::uSleep(20);
else if (g_use_mwaitx) {
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE); // 60000 ~20us on a 1.5Ghz CPU
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
}
}
}
+15 -2
Просмотреть файл
@@ -44,6 +44,9 @@
#include "core/inc/runtime.h"
#include "core/util/timer.h"
#include "core/util/locks.h"
#include <mwaitxintrin.h>
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
namespace rocr {
namespace core {
@@ -162,6 +165,8 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
double(timeout) / double(hsa_freq));
bool condition_met = false;
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
while (true) {
if (!IsValid()) return 0;
@@ -194,13 +199,21 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
}
if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
if (g_use_mwaitx) {
_mm_mwaitx(0, 0, 0);
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
}
continue;
}
if (time - start_time < kMaxElapsed) {
// os::uSleep(20);
// os::uSleep(20);
if (g_use_mwaitx) {
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE);
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
}
continue;
}
+6
Просмотреть файл
@@ -71,6 +71,7 @@ const char rocrbuildid[] __attribute__((used)) = "ROCR BUILD ID: " STRING(ROCR_B
namespace rocr {
namespace core {
bool g_use_interrupt_wait = true;
bool g_use_mwaitx = true;
Runtime* Runtime::runtime_singleton_ = NULL;
@@ -680,6 +681,10 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
*(bool*)value = ret;
break;
}
case HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED: {
*((bool*)value) = g_use_mwaitx;
break;
}
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
@@ -1405,6 +1410,7 @@ hsa_status_t Runtime::Load() {
flag_.Refresh();
g_use_interrupt_wait = flag_.enable_interrupt();
g_use_mwaitx = flag_.check_mwaitx(cpuinfo.mwaitx);
if (!AMD::Load()) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
+1
Просмотреть файл
@@ -245,6 +245,7 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
bool condition_met = false;
while (true) {
// Cannot mwaitx - polling multiple signals
for (uint32_t i = 0; i < signal_count; i++) {
if (!signals[i]->IsValid()) return uint32_t(-1);
+10
Просмотреть файл
@@ -163,6 +163,9 @@ class Flag {
var = os::GetEnvVar("HSA_IMAGE_PRINT_SRD");
image_print_srd_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_ENABLE_MWAITX");
enable_mwaitx_ = (var == "0") ? false : true;
// Temporary environment variable to disable CPU affinity override
// Will either rename to HSA_OVERRIDE_CPU_AFFINITY later or remove completely.
var = os::GetEnvVar("HSA_OVERRIDE_CPU_AFFINITY_DEBUG");
@@ -224,6 +227,12 @@ class Flag {
bool image_print_srd() const { return image_print_srd_; }
bool check_mwaitx(bool mwaitx_supported) {
if (enable_mwaitx_ && !mwaitx_supported) enable_mwaitx_ = false;
return enable_mwaitx_;
}
XNACK_REQUEST xnack() const { return xnack_; }
bool debug() const { return debug_; }
@@ -266,6 +275,7 @@ class Flag {
bool discover_copy_agents_;
bool override_cpu_affinity_;
bool image_print_srd_;
bool enable_mwaitx_;
SDMA_OVERRIDE enable_sdma_;
+15 -10
Просмотреть файл
@@ -482,19 +482,24 @@ typedef enum {
*/
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
/**
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
* this attribute is bool.
*/
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
* this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201,
// TODO: Should this be per Agent?
/**
* Returns true if all Agents have access to system allocated memory (such as
* that allocated by mmap, malloc, or new) by default.
* If false then system allocated memory may only be made SVM accessible to
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
* The type of this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
* Returns true if all Agents have access to system allocated memory (such as
* that allocated by mmap, malloc, or new) by default.
* If false then system allocated memory may only be made SVM accessible to
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
* The type of this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202,
/**
* Returns true if mwaitx is enabled on this system
* The type of this attribute is bool.
*/
HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203
} hsa_system_info_t;
/**