Use mwaitx when busy-waiting signals
Use mwaitx instructions when busy waiting for signals to reduce CPU energy usage. This can be disabled by setting HSA_ENABLE_MWAITX=0 Change-Id: Ic207895a491b2bf6dacba47ef0921df3faad5b5a
Этот коммит содержится в:
@@ -129,7 +129,7 @@ target_include_directories( ${CORE_RUNTIME_TARGET}
|
||||
set_property(TARGET ${CORE_RUNTIME_TARGET} PROPERTY INSTALL_RPATH "$ORIGIN;$ORIGIN/../../lib;$ORIGIN/../../lib64;$ORIGIN/../lib64" )
|
||||
|
||||
## ------------------------- Linux Compiler and Linker options -------------------------
|
||||
set ( HSA_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=missing-braces -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function )
|
||||
set ( HSA_CXX_FLAGS ${HSA_COMMON_CXX_FLAGS} -fexceptions -fno-rtti -fvisibility=hidden -Wno-error=missing-braces -Wno-error=sign-compare -Wno-sign-compare -Wno-write-strings -Wno-conversion-null -fno-math-errno -fno-threadsafe-statics -fmerge-all-constants -fms-extensions -Wno-error=comment -Wno-comment -Wno-error=pointer-arith -Wno-pointer-arith -Wno-error=unused-variable -Wno-error=unused-function -mmwaitx )
|
||||
|
||||
## Extra image settings - audit!
|
||||
set ( HSA_CXX_FLAGS ${HSA_CXX_FLAGS} -Wno-deprecated-declarations )
|
||||
|
||||
@@ -86,6 +86,7 @@ namespace AMD {
|
||||
|
||||
namespace core {
|
||||
extern bool g_use_interrupt_wait;
|
||||
extern bool g_use_mwaitx;
|
||||
|
||||
/// @brief Runtime class provides the following functions:
|
||||
/// - open and close connection to kernel driver.
|
||||
|
||||
@@ -42,6 +42,9 @@
|
||||
|
||||
#include "core/inc/default_signal.h"
|
||||
#include "core/util/timer.h"
|
||||
#include <mwaitxintrin.h>
|
||||
|
||||
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
|
||||
|
||||
namespace rocr {
|
||||
namespace core {
|
||||
@@ -100,6 +103,8 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
|
||||
timer::duration_from_seconds<timer::fast_clock::duration>(
|
||||
double(timeout) / double(hsa_freq));
|
||||
|
||||
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
|
||||
|
||||
while (true) {
|
||||
if (!IsValid()) return 0;
|
||||
|
||||
@@ -132,8 +137,12 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
if (time - start_time > kMaxElapsed) {
|
||||
|
||||
if (time - start_time > kMaxElapsed)
|
||||
os::uSleep(20);
|
||||
else if (g_use_mwaitx) {
|
||||
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE); // 60000 ~20us on a 1.5Ghz CPU
|
||||
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,6 +44,9 @@
|
||||
#include "core/inc/runtime.h"
|
||||
#include "core/util/timer.h"
|
||||
#include "core/util/locks.h"
|
||||
#include <mwaitxintrin.h>
|
||||
|
||||
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
|
||||
|
||||
namespace rocr {
|
||||
namespace core {
|
||||
@@ -162,6 +165,8 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
|
||||
double(timeout) / double(hsa_freq));
|
||||
|
||||
bool condition_met = false;
|
||||
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
|
||||
|
||||
while (true) {
|
||||
if (!IsValid()) return 0;
|
||||
|
||||
@@ -194,13 +199,21 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
|
||||
|
||||
if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
|
||||
if (g_use_mwaitx) {
|
||||
_mm_mwaitx(0, 0, 0);
|
||||
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (time - start_time < kMaxElapsed) {
|
||||
// os::uSleep(20);
|
||||
// os::uSleep(20);
|
||||
if (g_use_mwaitx) {
|
||||
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE);
|
||||
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ const char rocrbuildid[] __attribute__((used)) = "ROCR BUILD ID: " STRING(ROCR_B
|
||||
namespace rocr {
|
||||
namespace core {
|
||||
bool g_use_interrupt_wait = true;
|
||||
bool g_use_mwaitx = true;
|
||||
|
||||
Runtime* Runtime::runtime_singleton_ = NULL;
|
||||
|
||||
@@ -680,6 +681,10 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
|
||||
*(bool*)value = ret;
|
||||
break;
|
||||
}
|
||||
case HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED: {
|
||||
*((bool*)value) = g_use_mwaitx;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -1405,6 +1410,7 @@ hsa_status_t Runtime::Load() {
|
||||
|
||||
flag_.Refresh();
|
||||
g_use_interrupt_wait = flag_.enable_interrupt();
|
||||
g_use_mwaitx = flag_.check_mwaitx(cpuinfo.mwaitx);
|
||||
|
||||
if (!AMD::Load()) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
|
||||
@@ -245,6 +245,7 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
|
||||
|
||||
bool condition_met = false;
|
||||
while (true) {
|
||||
// Cannot mwaitx - polling multiple signals
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
if (!signals[i]->IsValid()) return uint32_t(-1);
|
||||
|
||||
|
||||
@@ -163,6 +163,9 @@ class Flag {
|
||||
var = os::GetEnvVar("HSA_IMAGE_PRINT_SRD");
|
||||
image_print_srd_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_ENABLE_MWAITX");
|
||||
enable_mwaitx_ = (var == "0") ? false : true;
|
||||
|
||||
// Temporary environment variable to disable CPU affinity override
|
||||
// Will either rename to HSA_OVERRIDE_CPU_AFFINITY later or remove completely.
|
||||
var = os::GetEnvVar("HSA_OVERRIDE_CPU_AFFINITY_DEBUG");
|
||||
@@ -224,6 +227,12 @@ class Flag {
|
||||
|
||||
bool image_print_srd() const { return image_print_srd_; }
|
||||
|
||||
bool check_mwaitx(bool mwaitx_supported) {
|
||||
if (enable_mwaitx_ && !mwaitx_supported) enable_mwaitx_ = false;
|
||||
|
||||
return enable_mwaitx_;
|
||||
}
|
||||
|
||||
XNACK_REQUEST xnack() const { return xnack_; }
|
||||
|
||||
bool debug() const { return debug_; }
|
||||
@@ -266,6 +275,7 @@ class Flag {
|
||||
bool discover_copy_agents_;
|
||||
bool override_cpu_affinity_;
|
||||
bool image_print_srd_;
|
||||
bool enable_mwaitx_;
|
||||
|
||||
SDMA_OVERRIDE enable_sdma_;
|
||||
|
||||
|
||||
@@ -482,19 +482,24 @@ typedef enum {
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
|
||||
/**
|
||||
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
|
||||
* this attribute is bool.
|
||||
*/
|
||||
* Returns true if hsa_amd_svm_* APIs are supported by the driver. The type of
|
||||
* this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201,
|
||||
// TODO: Should this be per Agent?
|
||||
/**
|
||||
* Returns true if all Agents have access to system allocated memory (such as
|
||||
* that allocated by mmap, malloc, or new) by default.
|
||||
* If false then system allocated memory may only be made SVM accessible to
|
||||
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
|
||||
* The type of this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
|
||||
* Returns true if all Agents have access to system allocated memory (such as
|
||||
* that allocated by mmap, malloc, or new) by default.
|
||||
* If false then system allocated memory may only be made SVM accessible to
|
||||
* an Agent by declaration of accessibility with hsa_amd_svm_set_attributes.
|
||||
* The type of this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202,
|
||||
/**
|
||||
* Returns true if mwaitx is enabled on this system
|
||||
* The type of this attribute is bool.
|
||||
*/
|
||||
HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED = 0x203
|
||||
} hsa_system_info_t;
|
||||
|
||||
/**
|
||||
|
||||
Ссылка в новой задаче
Block a user