From 2caa405817e422dc64a4d4e1a2a3e18b101aea2b Mon Sep 17 00:00:00 2001 From: "Besar Wicaksono (xN/A) TX [TEXT]" Date: Fri, 1 Apr 2016 17:13:45 -0500 Subject: [PATCH] Add environment flag to enable sdma workaround that will wait for the sdma queue to be idle before updating the write pointer. Add class to manage environment flags. [git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1254004] [ROCm/ROCR-Runtime commit: c95f96a9e4132ac35669d92cb80f10472194291d] --- .../runtime/hsa-runtime/core/inc/runtime.h | 8 +- .../core/runtime/amd_aql_queue.cpp | 2 +- .../core/runtime/amd_blit_sdma.cpp | 19 ++- .../core/runtime/amd_gpu_agent.cpp | 13 +- .../core/runtime/amd_loader_context.cpp | 5 +- .../core/runtime/hsa_ext_interface.cpp | 2 +- .../hsa-runtime/core/runtime/runtime.cpp | 15 +- .../runtime/hsa-runtime/core/util/flag.h | 128 ++++++++++++++++++ 8 files changed, 171 insertions(+), 21 deletions(-) create mode 100644 projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index c59a6ee0d2..1f6d68b72c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -54,9 +54,10 @@ #include "core/inc/agent.h" #include "core/inc/memory_region.h" #include "core/inc/signal.h" -#include "core/util/utils.h" +#include "core/util/flag.h" #include "core/util/locks.h" #include "core/util/os.h" +#include "core/util/utils.h" #include "core/inc/amd_loader_context.hpp" #include "amd_hsa_code.hpp" @@ -301,6 +302,8 @@ class Runtime { return system_deallocator_; } + const Flag& flag() const { return flag_; } + ExtensionEntryPoints extensions_; protected: @@ -488,6 +491,9 @@ class Runtime { // Holds reference count to runtime object. volatile uint32_t ref_count_; + // Track environment variables. + Flag flag_; + // Frees runtime memory when the runtime library is unloaded if safe to do so. // Failure to release the runtime indicates an incorrect application but is // common (example: calls library routines at process exit). diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 870410a69c..9bfa780454 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -274,7 +274,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, assert(amd_queue_.group_segment_aperture_base_hi != NULL && "No group region found."); - if (os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH") == "1") { + if (core::Runtime::runtime_singleton_->flag().check_flat_scratch()) { assert(amd_queue_.private_segment_aperture_base_hi != NULL && "No private region found."); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index 03f9792a72..e4cc8eff07 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -676,10 +676,25 @@ void BlitSdma::UpdateWriteAndDoorbellRegister(uint32_t current_offset, // Otherwise the CP may read invalid packets. if (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) == current_offset) { + if (core::Runtime::runtime_singleton_->flag().sdma_wait_idle()) { + // TODO(bwicakso): remove when sdma wpointer issue is resolved. + // Wait until the SDMA engine finish processing all packets before + // updating the wptr and doorbell. + while (atomic::Load(queue_resource_.Queue_read_ptr, + std::memory_order_acquire) != current_offset) { + os::YieldThread(); + } + } + // Update write pointer and doorbel register. atomic::Store(queue_resource_.Queue_write_ptr, new_offset); - atomic::Store(queue_resource_.Queue_DoorBell, new_offset, - std::memory_order_release); + + std::atomic_thread_fence(std::memory_order_release); + + atomic::Store(queue_resource_.Queue_DoorBell, new_offset); + + std::atomic_thread_fence(std::memory_order_release); + atomic::Store(&cached_commit_offset_, new_offset); break; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index e56be83aff..ae4f5deb8d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -96,8 +96,7 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) ? HSA_AMD_COHERENCY_TYPE_COHERENT : HSA_AMD_COHERENCY_TYPE_NONCOHERENT); - max_queues_ = - static_cast(atoi(os::GetEnvVar("HSA_MAX_QUEUES").c_str())); + max_queues_ = core::Runtime::runtime_singleton_->flag().max_queues(); #if !defined(HSA_LARGE_MODEL) || !defined(__linux__) if (max_queues_ == 0) { max_queues_ = 10; @@ -249,7 +248,8 @@ void GpuAgent::InitScratchPool() { flags.ui32.Scratch = 1; flags.ui32.HostAccess = 1; - scratch_per_thread_ = atoi(os::GetEnvVar("HSA_SCRATCH_MEM").c_str()); + scratch_per_thread_ = + core::Runtime::runtime_singleton_->flag().scratch_mem_size(); if (scratch_per_thread_ == 0) scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD; @@ -388,10 +388,9 @@ core::Blit* GpuAgent::CreateBlitKernel() { hsa_status_t GpuAgent::InitDma() { // Try create SDMA blit first. - std::string sdma_enable = os::GetEnvVar("HSA_ENABLE_SDMA"); - - if (sdma_enable != "0" && isa_->GetMajorVersion() == 8 && - isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) { + if (core::Runtime::runtime_singleton_->flag().enable_sdma() && + isa_->GetMajorVersion() == 8 && isa_->GetMinorVersion() == 0 && + isa_->GetStepping() == 3) { blit_h2d_ = CreateBlitSdma(); blit_d2h_ = CreateBlitSdma(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp index 5d5f44c718..b38a25ad4b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp @@ -77,8 +77,9 @@ bool IsDebuggerRegistered() { return false; // Leaving code commented as it will be used later on - // return (("1" == os::GetEnvVar("HSA_EMULATE_AQL")) && - // (0 != os::GetEnvVar("HSA_TOOLS_LIB").size())); + //return ((core::Runtime::runtime_singleton_->flag().emulate_aql()) && + // (0 != + // core::Runtime::runtime_singleton_->flag().tools_lib_names().size())); } class SegmentMemory { diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp index 454e004eab..3aa9f5c04e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_interface.cpp @@ -194,7 +194,7 @@ void ExtensionEntryPoints::Unload() { } // Due to valgrind bug, runtime cannot dlclose extensions see: // http://valgrind.org/docs/manual/faq.html#faq.unhelpful - if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") { + if (!core::Runtime::runtime_singleton_->flag().running_valgrind()) { for (int i = 0; i < libs_.size(); i++) { os::CloseLib(libs_[i]); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 27fe49aa5a..80a78154be 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -726,8 +726,7 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) { return false; } - std::string print_vm_message = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE"); - if (print_vm_message == "1") { + if (runtime_singleton_->flag().enable_vm_fault_message()) { HsaEvent* vm_fault_event = vm_fault_signal->EopEvent(); const HsaMemoryAccessFault& fault = @@ -744,6 +743,8 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) { reason += "Host access only"; } else if (fault.Failure.ECC == 1) { reason += "ECC failure (if supported by HW)"; + } else { + reason += "Unknown"; } fprintf(stderr, @@ -778,9 +779,9 @@ Runtime::Runtime() } void Runtime::Load() { - // Load interrupt enable option - std::string interrupt = os::GetEnvVar("HSA_ENABLE_INTERRUPT"); - g_use_interrupt_wait = (interrupt != "0"); + flag_.Refresh(); + + g_use_interrupt_wait = flag_.enable_interrupt(); if (!amd::Load()) { return; @@ -893,7 +894,7 @@ void Runtime::LoadTools() { hsa_api_table_.LinkExts(&extensions_.table); // Load tool libs - std::string tool_names = os::GetEnvVar("HSA_TOOLS_LIB"); + std::string tool_names = flag_.tools_lib_names(); if (tool_names != "") { std::vector names = parse_tool_names(tool_names); std::vector failed; @@ -954,7 +955,7 @@ void Runtime::UnloadTools() { void Runtime::CloseTools() { // Due to valgrind bug, runtime cannot dlclose extensions see: // http://valgrind.org/docs/manual/faq.html#faq.unhelpful - if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") { + if (!flag_.running_valgrind()) { for (int i = 0; i < tool_libs_.size(); i++) os::CloseLib(tool_libs_[i]); } tool_libs_.clear(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h new file mode 100644 index 0000000000..9237a2dffe --- /dev/null +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/flag.h @@ -0,0 +1,128 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// The University of Illinois/NCSA +// Open Source License (NCSA) +// +// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// +// Developed by: +// +// AMD Research and AMD HSA Software Development +// +// Advanced Micro Devices, Inc. +// +// www.amd.com +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal with the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimers. +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimers in +// the documentation and/or other materials provided with the distribution. +// - Neither the names of Advanced Micro Devices, Inc, +// nor the names of its contributors may be used to endorse or promote +// products derived from this Software without specific prior written +// permission. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS WITH THE SOFTWARE. +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HSA_RUNTIME_CORE_INC_FLAG_H_ +#define HSA_RUNTIME_CORE_INC_FLAG_H_ + +#include + +#include + +#include "core/util/os.h" +#include "core/util/utils.h" + +class Flag { + public: + explicit Flag() { Refresh(); } + + virtual ~Flag() {} + + void Refresh() { + std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH"); + check_flat_scratch_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE"); + enable_vm_fault_message_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_ENABLE_INTERRUPT"); + enable_interrupt_ = (var == "0") ? false : true; + + var = os::GetEnvVar("HSA_ENABLE_SDMA"); + enable_sdma_ = (var == "0") ? false : true; + + var = os::GetEnvVar("HSA_EMULATE_AQL"); + emulate_aql_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND"); + running_valgrind_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE"); + sdma_wait_idle_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_MAX_QUEUES"); + max_queues_ = static_cast(atoi(var.c_str())); + + var = os::GetEnvVar("HSA_SCRATCH_MEM"); + scratch_mem_size_ = atoi(var.c_str()); + + tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB"); + } + + bool check_flat_scratch() const { return check_flat_scratch_; } + + bool enable_vm_fault_message() const { return enable_vm_fault_message_; } + + bool enable_interrupt() const { return enable_interrupt_; } + + bool enable_sdma() const { return enable_sdma_; } + + bool emulate_aql() const { return emulate_aql_; } + + bool running_valgrind() const { return running_valgrind_; } + + bool sdma_wait_idle() const { return sdma_wait_idle_; } + + uint32_t max_queues() const { return max_queues_; } + + size_t scratch_mem_size() const { return scratch_mem_size_; } + + std::string tools_lib_names() const { return tools_lib_names_; } + + private: + bool check_flat_scratch_; + bool enable_vm_fault_message_; + bool enable_interrupt_; + bool enable_sdma_; + bool emulate_aql_; + bool running_valgrind_; + bool sdma_wait_idle_; + + uint32_t max_queues_; + + size_t scratch_mem_size_; + + std::string tools_lib_names_; + + DISALLOW_COPY_AND_ASSIGN(Flag); +}; + +#endif // header guard