From 11541cc28343ea7aed2d4ea6a03a7a024d12671a Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 11 Apr 2023 21:32:08 +0000 Subject: [PATCH] Add env var to override SRAM ECC Add HSA_ENABLE_SRAMECC environment variable that can be used to override SRAM ECC mode reported by KFD Change-Id: I2b95511820a2d3d146a76b03070659c0695b61fd [ROCm/ROCR-Runtime commit: a180c9ee78e729a2cd5225392504cb2db52d4b9d] --- .../hsa-runtime/core/runtime/amd_gpu_agent.cpp | 14 ++++++++++++-- .../runtime/hsa-runtime/core/util/flag.h | 9 +++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 00547d16d8..bd07fadab4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -124,8 +124,18 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna rocr::core::IsaFeature sramecc = rocr::core::IsaFeature::Unsupported; if (isa_base->IsSrameccSupported()) { - sramecc = node_props.Capability.ui32.SRAM_EDCSupport == 1 ? core::IsaFeature::Enabled - : core::IsaFeature::Disabled; + switch (core::Runtime::runtime_singleton_->flag().sramecc_enable()) { + case Flag::SRAMECC_DISABLED: + sramecc = core::IsaFeature::Disabled; + break; + case Flag::SRAMECC_ENABLED: + sramecc = core::IsaFeature::Enabled; + break; + case Flag::SRAMECC_DEFAULT: + sramecc = node_props.Capability.ui32.SRAM_EDCSupport == 1 ? core::IsaFeature::Enabled + : core::IsaFeature::Disabled; + break; + } } rocr::core::IsaFeature xnack = rocr::core::IsaFeature::Unsupported; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h index 5c5e8f3101..2f7f726902 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h @@ -57,6 +57,7 @@ namespace rocr { class Flag { public: enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT }; + enum SRAMECC_ENABLE { SRAMECC_DISABLED, SRAMECC_ENABLED, SRAMECC_DEFAULT }; // The values are meaningful and chosen to satisfy the thunk API. enum XNACK_REQUEST { XNACK_DISABLE = 0, XNACK_ENABLE = 1, XNACK_UNCHANGED = 2 }; @@ -175,6 +176,10 @@ class Flag { var = os::GetEnvVar("HSA_SVM_PROFILE"); svm_profile_ = var; + var = os::GetEnvVar("HSA_ENABLE_SRAMECC"); + sramecc_enable_ = + (var == "0") ? SRAMECC_DISABLED : ((var == "1") ? SRAMECC_ENABLED : SRAMECC_DEFAULT); + var = os::GetEnvVar("HSA_IMAGE_PRINT_SRD"); image_print_srd_ = (var == "1") ? true : false; @@ -269,6 +274,8 @@ class Flag { const std::string& svm_profile() const { return svm_profile_; } + SRAMECC_ENABLE sramecc_enable() const { return sramecc_enable_; } + private: bool check_flat_scratch_; bool enable_vm_fault_message_; @@ -312,6 +319,8 @@ class Flag { // Indicates user preference for Xnack state. XNACK_REQUEST xnack_; + SRAMECC_ENABLE sramecc_enable_; + // Map GPU index post RVD to its default cu mask. std::map> cu_mask_;