Add environment flag to enable sdma workaround that will wait for the sdma queue to be idle before updating the write pointer. Add class to manage environment flags.

[git-p4: depot-paths = "//depot/stg/hsa/drivers/hsa/runtime/": change = 1254004]


[ROCm/ROCR-Runtime commit: c95f96a9e4]
Этот коммит содержится в:
Besar Wicaksono (xN/A) TX [TEXT]
2016-04-01 17:13:45 -05:00
родитель fae50d66a8
Коммит 2caa405817
8 изменённых файлов: 171 добавлений и 21 удалений
+7 -1
Просмотреть файл
@@ -54,9 +54,10 @@
#include "core/inc/agent.h"
#include "core/inc/memory_region.h"
#include "core/inc/signal.h"
#include "core/util/utils.h"
#include "core/util/flag.h"
#include "core/util/locks.h"
#include "core/util/os.h"
#include "core/util/utils.h"
#include "core/inc/amd_loader_context.hpp"
#include "amd_hsa_code.hpp"
@@ -301,6 +302,8 @@ class Runtime {
return system_deallocator_;
}
const Flag& flag() const { return flag_; }
ExtensionEntryPoints extensions_;
protected:
@@ -488,6 +491,9 @@ class Runtime {
// Holds reference count to runtime object.
volatile uint32_t ref_count_;
// Track environment variables.
Flag flag_;
// Frees runtime memory when the runtime library is unloaded if safe to do so.
// Failure to release the runtime indicates an incorrect application but is
// common (example: calls library routines at process exit).
+1 -1
Просмотреть файл
@@ -274,7 +274,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id,
assert(amd_queue_.group_segment_aperture_base_hi != NULL &&
"No group region found.");
if (os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH") == "1") {
if (core::Runtime::runtime_singleton_->flag().check_flat_scratch()) {
assert(amd_queue_.private_segment_aperture_base_hi != NULL &&
"No private region found.");
}
+17 -2
Просмотреть файл
@@ -676,10 +676,25 @@ void BlitSdma::UpdateWriteAndDoorbellRegister(uint32_t current_offset,
// Otherwise the CP may read invalid packets.
if (atomic::Load(&cached_commit_offset_, std::memory_order_acquire) ==
current_offset) {
if (core::Runtime::runtime_singleton_->flag().sdma_wait_idle()) {
// TODO(bwicakso): remove when sdma wpointer issue is resolved.
// Wait until the SDMA engine finish processing all packets before
// updating the wptr and doorbell.
while (atomic::Load(queue_resource_.Queue_read_ptr,
std::memory_order_acquire) != current_offset) {
os::YieldThread();
}
}
// Update write pointer and doorbel register.
atomic::Store(queue_resource_.Queue_write_ptr, new_offset);
atomic::Store(queue_resource_.Queue_DoorBell, new_offset,
std::memory_order_release);
std::atomic_thread_fence(std::memory_order_release);
atomic::Store(queue_resource_.Queue_DoorBell, new_offset);
std::atomic_thread_fence(std::memory_order_release);
atomic::Store(&cached_commit_offset_, new_offset);
break;
}
+6 -7
Просмотреть файл
@@ -96,8 +96,7 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
? HSA_AMD_COHERENCY_TYPE_COHERENT
: HSA_AMD_COHERENCY_TYPE_NONCOHERENT);
max_queues_ =
static_cast<uint32_t>(atoi(os::GetEnvVar("HSA_MAX_QUEUES").c_str()));
max_queues_ = core::Runtime::runtime_singleton_->flag().max_queues();
#if !defined(HSA_LARGE_MODEL) || !defined(__linux__)
if (max_queues_ == 0) {
max_queues_ = 10;
@@ -249,7 +248,8 @@ void GpuAgent::InitScratchPool() {
flags.ui32.Scratch = 1;
flags.ui32.HostAccess = 1;
scratch_per_thread_ = atoi(os::GetEnvVar("HSA_SCRATCH_MEM").c_str());
scratch_per_thread_ =
core::Runtime::runtime_singleton_->flag().scratch_mem_size();
if (scratch_per_thread_ == 0)
scratch_per_thread_ = DEFAULT_SCRATCH_BYTES_PER_THREAD;
@@ -388,10 +388,9 @@ core::Blit* GpuAgent::CreateBlitKernel() {
hsa_status_t GpuAgent::InitDma() {
// Try create SDMA blit first.
std::string sdma_enable = os::GetEnvVar("HSA_ENABLE_SDMA");
if (sdma_enable != "0" && isa_->GetMajorVersion() == 8 &&
isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 3) {
if (core::Runtime::runtime_singleton_->flag().enable_sdma() &&
isa_->GetMajorVersion() == 8 && isa_->GetMinorVersion() == 0 &&
isa_->GetStepping() == 3) {
blit_h2d_ = CreateBlitSdma();
blit_d2h_ = CreateBlitSdma();
+3 -2
Просмотреть файл
@@ -77,8 +77,9 @@ bool IsDebuggerRegistered()
{
return false;
// Leaving code commented as it will be used later on
// return (("1" == os::GetEnvVar("HSA_EMULATE_AQL")) &&
// (0 != os::GetEnvVar("HSA_TOOLS_LIB").size()));
//return ((core::Runtime::runtime_singleton_->flag().emulate_aql()) &&
// (0 !=
// core::Runtime::runtime_singleton_->flag().tools_lib_names().size()));
}
class SegmentMemory {
+1 -1
Просмотреть файл
@@ -194,7 +194,7 @@ void ExtensionEntryPoints::Unload() {
}
// Due to valgrind bug, runtime cannot dlclose extensions see:
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") {
if (!core::Runtime::runtime_singleton_->flag().running_valgrind()) {
for (int i = 0; i < libs_.size(); i++) {
os::CloseLib(libs_[i]);
}
+8 -7
Просмотреть файл
@@ -726,8 +726,7 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
return false;
}
std::string print_vm_message = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
if (print_vm_message == "1") {
if (runtime_singleton_->flag().enable_vm_fault_message()) {
HsaEvent* vm_fault_event = vm_fault_signal->EopEvent();
const HsaMemoryAccessFault& fault =
@@ -744,6 +743,8 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
reason += "Host access only";
} else if (fault.Failure.ECC == 1) {
reason += "ECC failure (if supported by HW)";
} else {
reason += "Unknown";
}
fprintf(stderr,
@@ -778,9 +779,9 @@ Runtime::Runtime()
}
void Runtime::Load() {
// Load interrupt enable option
std::string interrupt = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
g_use_interrupt_wait = (interrupt != "0");
flag_.Refresh();
g_use_interrupt_wait = flag_.enable_interrupt();
if (!amd::Load()) {
return;
@@ -893,7 +894,7 @@ void Runtime::LoadTools() {
hsa_api_table_.LinkExts(&extensions_.table);
// Load tool libs
std::string tool_names = os::GetEnvVar("HSA_TOOLS_LIB");
std::string tool_names = flag_.tools_lib_names();
if (tool_names != "") {
std::vector<std::string> names = parse_tool_names(tool_names);
std::vector<const char*> failed;
@@ -954,7 +955,7 @@ void Runtime::UnloadTools() {
void Runtime::CloseTools() {
// Due to valgrind bug, runtime cannot dlclose extensions see:
// http://valgrind.org/docs/manual/faq.html#faq.unhelpful
if (os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND") != "1") {
if (!flag_.running_valgrind()) {
for (int i = 0; i < tool_libs_.size(); i++) os::CloseLib(tool_libs_[i]);
}
tool_libs_.clear();
+128
Просмотреть файл
@@ -0,0 +1,128 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_CORE_INC_FLAG_H_
#define HSA_RUNTIME_CORE_INC_FLAG_H_
#include <stdint.h>
#include <string>
#include "core/util/os.h"
#include "core/util/utils.h"
class Flag {
public:
explicit Flag() { Refresh(); }
virtual ~Flag() {}
void Refresh() {
std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH");
check_flat_scratch_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
enable_vm_fault_message_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
enable_interrupt_ = (var == "0") ? false : true;
var = os::GetEnvVar("HSA_ENABLE_SDMA");
enable_sdma_ = (var == "0") ? false : true;
var = os::GetEnvVar("HSA_EMULATE_AQL");
emulate_aql_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
running_valgrind_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE");
sdma_wait_idle_ = (var == "1") ? true : false;
var = os::GetEnvVar("HSA_MAX_QUEUES");
max_queues_ = static_cast<uint32_t>(atoi(var.c_str()));
var = os::GetEnvVar("HSA_SCRATCH_MEM");
scratch_mem_size_ = atoi(var.c_str());
tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB");
}
bool check_flat_scratch() const { return check_flat_scratch_; }
bool enable_vm_fault_message() const { return enable_vm_fault_message_; }
bool enable_interrupt() const { return enable_interrupt_; }
bool enable_sdma() const { return enable_sdma_; }
bool emulate_aql() const { return emulate_aql_; }
bool running_valgrind() const { return running_valgrind_; }
bool sdma_wait_idle() const { return sdma_wait_idle_; }
uint32_t max_queues() const { return max_queues_; }
size_t scratch_mem_size() const { return scratch_mem_size_; }
std::string tools_lib_names() const { return tools_lib_names_; }
private:
bool check_flat_scratch_;
bool enable_vm_fault_message_;
bool enable_interrupt_;
bool enable_sdma_;
bool emulate_aql_;
bool running_valgrind_;
bool sdma_wait_idle_;
uint32_t max_queues_;
size_t scratch_mem_size_;
std::string tools_lib_names_;
DISALLOW_COPY_AND_ASSIGN(Flag);
};
#endif // header guard