Add environment flag to enable sdma workaround that will wait for the sdma queue to be idle before updating the write pointer. Add class to manage environment flags.
[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1254004]
[ROCm/ROCR-Runtime commit: c95f96a9e4]
Этот коммит содержится в:
@@ -54,9 +54,10 @@
|
||||
#include "core/inc/agent.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
#include "core/inc/signal.h"
|
||||
#include "core/util/utils.h"
|
||||
#include "core/util/flag.h"
|
||||
#include "core/util/locks.h"
|
||||
#include "core/util/os.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
#include "core/inc/amd_loader_context.hpp"
|
||||
#include "amd_hsa_code.hpp"
|
||||
@@ -301,6 +302,8 @@ class Runtime {
|
||||
return system_deallocator_;
|
||||
}
|
||||
|
||||
const Flag& flag() const { return flag_; }
|
||||
|
||||
ExtensionEntryPoints extensions_;
|
||||
|
||||
protected:
|
||||
@@ -488,6 +491,9 @@ class Runtime {
|
||||
// Holds reference count to runtime object.
|
||||
volatile uint32_t ref_count_;
|
||||
|
||||
// Track environment variables.
|
||||
Flag flag_;
|
||||
|
||||
// Frees runtime memory when the runtime library is unloaded if safe to do so.
|
||||
// Failure to release the runtime indicates an incorrect application but is
|
||||
// common (example: calls library routines at process exit).
|
||||
|
||||
@@ -274,7 +274,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
|
||||
assert(amd_queue_.group_segment_aperture_base_hi != NULL &&
|
||||
"No group region found.");
|
||||
|
||||
if (os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH") == "1") {
|
||||
if (core::Runtime::runtime_singleton_->flag().check_flat_scratch()) {
|
||||
assert(amd_queue_.private_segment_aperture_base_hi != NULL &&
|
||||
"No private region found.");
|
||||
}
|
||||
|
||||
@@ -676,10 +676,25 @@ void BlitSdma::UpdateWriteAndDoorbellRegister(uint32_t current_offset,
|
||||
// Otherwise the CP may read invalid packets.
|
||||
if (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) ==
|
||||
current_offset) {
|
||||
if (core::Runtime::runtime_singleton_->flag().sdma_wait_idle()) {
|
||||
// TODO(bwicakso): remove when sdma wpointer issue is resolved.
|
||||
// Wait until the SDMA engine finish processing all packets before
|
||||
// updating the wptr and doorbell.
|
||||
while (atomic::Load(queue_resource_.Queue_read_ptr,
|
||||
std::memory_order_acquire) != current_offset) {
|
||||
os::YieldThread();
|
||||
}
|
||||
}
|
||||
|
||||
// Update write pointer and doorbel register.
|
||||
atomic::Store(queue_resource_.Queue_write_ptr, new_offset);
|
||||
atomic::Store(queue_resource_.Queue_DoorBell, new_offset,
|
||||
std::memory_order_release);
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
|
||||
atomic::Store(queue_resource_.Queue_DoorBell, new_offset);
|
||||
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
|
||||
atomic::Store(&cached_commit_offset_, new_offset);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -96,8 +96,7 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
? HSA_AMD_COHERENCY_TYPE_COHERENT
|
||||
: HSA_AMD_COHERENCY_TYPE_NONCOHERENT);
|
||||
|
||||
max_queues_ =
|
||||
static_cast<uint32_t>(atoi(os::GetEnvVar("HSA_MAX_QUEUES").c_str()));
|
||||
max_queues_ = core::Runtime::runtime_singleton_->flag().max_queues();
|
||||
#if !defined(HSA_LARGE_MODEL) || !defined(__linux__)
|
||||
if (max_queues_ == 0) {
|
||||
max_queues_ = 10;
|
||||
@@ -249,7 +248,8 @@ void GpuAgent::InitScratchPool() {
|
||||
flags.ui32.Scratch = 1;
|
||||
flags.ui32.HostAccess = 1;
|
||||
|
||||
scratch_per_thread_ = atoi(os::GetEnvVar("HSA_SCRATCH_MEM").c_str());
|
||||
scratch_per_thread_ =
|
||||
core::Runtime::runtime_singleton_->flag().scratch_mem_size();
|
||||
if (scratch_per_thread_ == 0)
|
||||
scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD;
|
||||
|
||||
@@ -388,10 +388,9 @@ core::Blit* GpuAgent::CreateBlitKernel() {
|
||||
|
||||
hsa_status_t GpuAgent::InitDma() {
|
||||
// Try create SDMA blit first.
|
||||
std::string sdma_enable = os::GetEnvVar("HSA_ENABLE_SDMA");
|
||||
|
||||
if (sdma_enable != "0" && isa_->GetMajorVersion() == 8 &&
|
||||
isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) {
|
||||
if (core::Runtime::runtime_singleton_->flag().enable_sdma() &&
|
||||
isa_->GetMajorVersion() == 8 && isa_->GetMinorVersion() == 0 &&
|
||||
isa_->GetStepping() == 3) {
|
||||
blit_h2d_ = CreateBlitSdma();
|
||||
blit_d2h_ = CreateBlitSdma();
|
||||
|
||||
|
||||
+3
-2
@@ -77,8 +77,9 @@ bool IsDebuggerRegistered()
|
||||
{
|
||||
return false;
|
||||
// Leaving code commented as it will be used later on
|
||||
// return (("1" == os::GetEnvVar("HSA_EMULATE_AQL")) &&
|
||||
// (0 != os::GetEnvVar("HSA_TOOLS_LIB").size()));
|
||||
//return ((core::Runtime::runtime_singleton_->flag().emulate_aql()) &&
|
||||
// (0 !=
|
||||
// core::Runtime::runtime_singleton_->flag().tools_lib_names().size()));
|
||||
}
|
||||
|
||||
class SegmentMemory {
|
||||
|
||||
@@ -194,7 +194,7 @@ void ExtensionEntryPoints::Unload() {
|
||||
}
|
||||
// Due to valgrind bug, runtime cannot dlclose extensions see:
|
||||
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
|
||||
if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") {
|
||||
if (!core::Runtime::runtime_singleton_->flag().running_valgrind()) {
|
||||
for (int i = 0; i < libs_.size(); i++) {
|
||||
os::CloseLib(libs_[i]);
|
||||
}
|
||||
|
||||
@@ -726,8 +726,7 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string print_vm_message = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
|
||||
if (print_vm_message == "1") {
|
||||
if (runtime_singleton_->flag().enable_vm_fault_message()) {
|
||||
HsaEvent* vm_fault_event = vm_fault_signal->EopEvent();
|
||||
|
||||
const HsaMemoryAccessFault& fault =
|
||||
@@ -744,6 +743,8 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
|
||||
reason += "Host access only";
|
||||
} else if (fault.Failure.ECC == 1) {
|
||||
reason += "ECC failure (if supported by HW)";
|
||||
} else {
|
||||
reason += "Unknown";
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
@@ -778,9 +779,9 @@ Runtime::Runtime()
|
||||
}
|
||||
|
||||
void Runtime::Load() {
|
||||
// Load interrupt enable option
|
||||
std::string interrupt = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
|
||||
g_use_interrupt_wait = (interrupt != "0");
|
||||
flag_.Refresh();
|
||||
|
||||
g_use_interrupt_wait = flag_.enable_interrupt();
|
||||
|
||||
if (!amd::Load()) {
|
||||
return;
|
||||
@@ -893,7 +894,7 @@ void Runtime::LoadTools() {
|
||||
hsa_api_table_.LinkExts(&extensions_.table);
|
||||
|
||||
// Load tool libs
|
||||
std::string tool_names = os::GetEnvVar("HSA_TOOLS_LIB");
|
||||
std::string tool_names = flag_.tools_lib_names();
|
||||
if (tool_names != "") {
|
||||
std::vector<std::string> names = parse_tool_names(tool_names);
|
||||
std::vector<const char*> failed;
|
||||
@@ -954,7 +955,7 @@ void Runtime::UnloadTools() {
|
||||
void Runtime::CloseTools() {
|
||||
// Due to valgrind bug, runtime cannot dlclose extensions see:
|
||||
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
|
||||
if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") {
|
||||
if (!flag_.running_valgrind()) {
|
||||
for (int i = 0; i < tool_libs_.size(); i++) os::CloseLib(tool_libs_[i]);
|
||||
}
|
||||
tool_libs_.clear();
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HSA_RUNTIME_CORE_INC_FLAG_H_
|
||||
#define HSA_RUNTIME_CORE_INC_FLAG_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "core/util/os.h"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
class Flag {
|
||||
public:
|
||||
explicit Flag() { Refresh(); }
|
||||
|
||||
virtual ~Flag() {}
|
||||
|
||||
void Refresh() {
|
||||
std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH");
|
||||
check_flat_scratch_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
|
||||
enable_vm_fault_message_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
|
||||
enable_interrupt_ = (var == "0") ? false : true;
|
||||
|
||||
var = os::GetEnvVar("HSA_ENABLE_SDMA");
|
||||
enable_sdma_ = (var == "0") ? false : true;
|
||||
|
||||
var = os::GetEnvVar("HSA_EMULATE_AQL");
|
||||
emulate_aql_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
|
||||
running_valgrind_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE");
|
||||
sdma_wait_idle_ = (var == "1") ? true : false;
|
||||
|
||||
var = os::GetEnvVar("HSA_MAX_QUEUES");
|
||||
max_queues_ = static_cast<uint32_t>(atoi(var.c_str()));
|
||||
|
||||
var = os::GetEnvVar("HSA_SCRATCH_MEM");
|
||||
scratch_mem_size_ = atoi(var.c_str());
|
||||
|
||||
tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB");
|
||||
}
|
||||
|
||||
bool check_flat_scratch() const { return check_flat_scratch_; }
|
||||
|
||||
bool enable_vm_fault_message() const { return enable_vm_fault_message_; }
|
||||
|
||||
bool enable_interrupt() const { return enable_interrupt_; }
|
||||
|
||||
bool enable_sdma() const { return enable_sdma_; }
|
||||
|
||||
bool emulate_aql() const { return emulate_aql_; }
|
||||
|
||||
bool running_valgrind() const { return running_valgrind_; }
|
||||
|
||||
bool sdma_wait_idle() const { return sdma_wait_idle_; }
|
||||
|
||||
uint32_t max_queues() const { return max_queues_; }
|
||||
|
||||
size_t scratch_mem_size() const { return scratch_mem_size_; }
|
||||
|
||||
std::string tools_lib_names() const { return tools_lib_names_; }
|
||||
|
||||
private:
|
||||
bool check_flat_scratch_;
|
||||
bool enable_vm_fault_message_;
|
||||
bool enable_interrupt_;
|
||||
bool enable_sdma_;
|
||||
bool emulate_aql_;
|
||||
bool running_valgrind_;
|
||||
bool sdma_wait_idle_;
|
||||
|
||||
uint32_t max_queues_;
|
||||
|
||||
size_t scratch_mem_size_;
|
||||
|
||||
std::string tools_lib_names_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(Flag);
|
||||
};
|
||||
|
||||
#endif // header guard
|
||||
Ссылка в новой задаче
Block a user