da2607024b
On large BAR systems, for small-sized code-objects, we get performance using direct memcpy due to latencies when doing the blit-copy.
496 Zeilen
18 KiB
C++
496 Zeilen
18 KiB
C++
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The University of Illinois/NCSA
|
|
// Open Source License (NCSA)
|
|
//
|
|
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Developed by:
|
|
//
|
|
// AMD Research and AMD HSA Software Development
|
|
//
|
|
// Advanced Micro Devices, Inc.
|
|
//
|
|
// www.amd.com
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to
|
|
// deal with the Software without restriction, including without limitation
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
//
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimers.
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimers in
|
|
// the documentation and/or other materials provided with the distribution.
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
// products derived from this Software without specific prior written
|
|
// permission.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIESd OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef HSA_RUNTIME_CORE_INC_FLAG_H_
|
|
#define HSA_RUNTIME_CORE_INC_FLAG_H_
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <vector>
|
|
#include <map>
|
|
#include <string>
|
|
|
|
#include "core/util/os.h"
|
|
#include "core/util/utils.h"
|
|
|
|
namespace rocr {
|
|
|
|
class Flag {
|
|
public:
|
|
enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
|
|
enum SRAMECC_ENABLE { SRAMECC_DISABLED, SRAMECC_ENABLED, SRAMECC_DEFAULT };
|
|
|
|
// The values are meaningful and chosen to satisfy the thunk API.
|
|
enum XNACK_REQUEST { XNACK_DISABLE = 0, XNACK_ENABLE = 1, XNACK_UNCHANGED = 2 };
|
|
static_assert(XNACK_DISABLE == 0, "XNACK_REQUEST enum values improperly changed.");
|
|
static_assert(XNACK_ENABLE == 1, "XNACK_REQUEST enum values improperly changed.");
|
|
|
|
// Lift limit for 2.10 release RCCL workaround. This limit is not used when asynchronous scratch
|
|
// reclaim is supported
|
|
const size_t DEFAULT_SCRATCH_SINGLE_LIMIT = (140 * (1UL<<20)); // small_limit >> 2;
|
|
const size_t DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC = (3 * (1UL<<30)); // 3 GB
|
|
const size_t DEFAULT_PCS_MAX_DEVICE_BUFFER_SIZE = (256 * (1UL<<20)); //256 MB
|
|
|
|
Flag() {}
|
|
|
|
virtual ~Flag() {}
|
|
|
|
void Refresh() {
|
|
std::string var = os::GetEnvVar("HSA_CHECK_FLAT_SCRATCH");
|
|
check_flat_scratch_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_VM_FAULT_MESSAGE");
|
|
enable_vm_fault_message_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_QUEUE_FAULT_MESSAGE");
|
|
enable_queue_fault_message_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_INTERRUPT");
|
|
enable_interrupt_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SDMA");
|
|
enable_sdma_ = (var == "0") ? SDMA_DISABLE : ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_PEER_SDMA");
|
|
enable_peer_sdma_ = (var == "0") ? SDMA_DISABLE : ((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SDMA_GANG");
|
|
enable_sdma_gang_ = (var == "0") ? SDMA_DISABLE :
|
|
((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
|
|
if (enable_sdma_ == SDMA_DISABLE) enable_sdma_gang_ = SDMA_DISABLE;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SDMA_COPY_SIZE_OVERRIDE");
|
|
enable_sdma_copy_size_override_ = (var == "0") ? SDMA_DISABLE :
|
|
((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SDMA_RECOMMENDED_ENG");
|
|
enable_sdma_recommended_eng_ = (var == "0") ? SDMA_DISABLE :
|
|
((var == "1") ? SDMA_ENABLE : SDMA_DEFAULT);
|
|
|
|
visible_gpus_ = os::GetEnvVar("ROCR_VISIBLE_DEVICES");
|
|
filter_visible_gpus_ = os::IsEnvVarSet("ROCR_VISIBLE_DEVICES");
|
|
|
|
var = os::GetEnvVar("HSA_RUNNING_UNDER_VALGRIND");
|
|
running_valgrind_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_SDMA_WAIT_IDLE");
|
|
sdma_wait_idle_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_MAX_QUEUES");
|
|
max_queues_ = static_cast<uint32_t>(atoi(var.c_str()));
|
|
|
|
// Maximum amount of scratch mem that can be used per process per gpu
|
|
var = os::GetEnvVar("HSA_SCRATCH_MEM");
|
|
scratch_mem_size_ = atoi(var.c_str());
|
|
|
|
// Scratch memory sizes > HSA_SCRATCH_SINGLE_LIMIT will trigger a use-once scheme
|
|
// We also reserve HSA_SCRATCH_SINGLE_LIMIT per process per gpu to guarrantee we
|
|
// have sufficient memory to for scratch in case user tried to allocate all device
|
|
// memory
|
|
if (os::IsEnvVarSet("HSA_SCRATCH_SINGLE_LIMIT")) {
|
|
var = os::GetEnvVar("HSA_SCRATCH_SINGLE_LIMIT");
|
|
char* end;
|
|
scratch_single_limit_ = strtoul(var.c_str(), &end, 10);
|
|
} else {
|
|
scratch_single_limit_ = DEFAULT_SCRATCH_SINGLE_LIMIT;
|
|
}
|
|
|
|
// On GPUs that support asynchronous scratch reclaim
|
|
// Scratch memory sizes > HSA_SCRATCH_SINGLE_LIMIT_ASYNC will trigger a use-once scheme
|
|
// Note: This only sets the initial value for the threshold. If
|
|
// hsa_amd_agent_set_async_scratch_limit is called after initialization, the threshold
|
|
// will be updated.
|
|
if (os::IsEnvVarSet("HSA_SCRATCH_SINGLE_LIMIT_ASYNC")) {
|
|
var = os::GetEnvVar("HSA_SCRATCH_SINGLE_LIMIT_ASYNC");
|
|
char* end;
|
|
scratch_single_limit_async_ = strtoul(var.c_str(), &end, 10);
|
|
} else {
|
|
scratch_single_limit_async_ = 0; // DEFAULT_SCRATCH_SINGLE_LIMIT_ASYNC_PER_XCC;
|
|
}
|
|
|
|
// On GPUs that support asynchronous scratch reclaim this can be used to disable this feature.
|
|
// Disabling asynchronous scratch reclaim also disables use of alternate scratch
|
|
// HSA_ENABLE_SCRATCH_ALT
|
|
var = os::GetEnvVar("HSA_ENABLE_SCRATCH_ASYNC_RECLAIM");
|
|
enable_scratch_async_reclaim_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SCRATCH_ALT");
|
|
// Temporary: Completely disable alternate scratch because we need to update
|
|
// the debugger so that it can tell whether a dispatch is using alternate scratch
|
|
// instead of main scratch
|
|
// enable_scratch_alt_ = (var == "0") || !enable_scratch_async_reclaim_ ? false : true;
|
|
enable_scratch_alt_ = false;
|
|
|
|
tools_lib_names_ = os::GetEnvVar("HSA_TOOLS_LIB");
|
|
|
|
var = os::GetEnvVar("HSA_TOOLS_REPORT_LOAD_FAILURE");
|
|
|
|
ifdebug {
|
|
report_tool_load_failures_ = (var == "1") ? true : false;
|
|
} else {
|
|
report_tool_load_failures_ = (var == "0") ? false : true;
|
|
}
|
|
|
|
var = os::GetEnvVar("HSA_TOOLS_DISABLE_REGISTER");
|
|
disable_tool_register_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_TOOLS_REPORT_REGISTER_FAILURE");
|
|
report_tool_register_failures_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_DISABLE_FRAGMENT_ALLOCATOR");
|
|
disable_fragment_alloc_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SDMA_HDP_FLUSH");
|
|
enable_sdma_hdp_flush_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_REV_COPY_DIR");
|
|
rev_copy_dir_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_FORCE_FINE_GRAIN_PCIE");
|
|
fine_grain_pcie_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_NO_SCRATCH_RECLAIM");
|
|
no_scratch_reclaim_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_NO_SCRATCH_THREAD_LIMITER");
|
|
no_scratch_thread_limit_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_DISABLE_IMAGE");
|
|
disable_image_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_DISABLE_PC_SAMPLING");
|
|
disable_pc_sampling_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_LOADER_ENABLE_MMAP_URI");
|
|
loader_enable_mmap_uri_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_FORCE_SDMA_SIZE");
|
|
force_sdma_size_ = var.empty() ? 1024 * 1024 : atoi(var.c_str());
|
|
|
|
var = os::GetEnvVar("HSA_IGNORE_SRAMECC_MISREPORT");
|
|
check_sramecc_validity_ = (var == "1") ? false : true;
|
|
|
|
// Legal values are zero "0" or one "1". Any other value will
|
|
// be interpreted as not defining the env variable.
|
|
var = os::GetEnvVar("HSA_XNACK");
|
|
xnack_ = (var == "0") ? XNACK_DISABLE : ((var == "1") ? XNACK_ENABLE : XNACK_UNCHANGED);
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_DEBUG");
|
|
debug_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_CU_MASK_SKIP_INIT");
|
|
cu_mask_skip_init_ = (var == "1") ? true : false;
|
|
|
|
// Temporary opt-in for corrected HSA_AMD_AGENT_INFO_COOPERATIVE_COMPUTE_UNIT_COUNT behavior.
|
|
// Will become opt-out and possibly removed in future releases.
|
|
var = os::GetEnvVar("HSA_COOP_CU_COUNT");
|
|
coop_cu_count_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_DISCOVER_COPY_AGENTS");
|
|
discover_copy_agents_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_SVM_PROFILE");
|
|
svm_profile_ = var;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_SRAMECC");
|
|
sramecc_enable_ =
|
|
(var == "0") ? SRAMECC_DISABLED : ((var == "1") ? SRAMECC_ENABLED : SRAMECC_DEFAULT);
|
|
|
|
var = os::GetEnvVar("HSA_IMAGE_PRINT_SRD");
|
|
image_print_srd_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_MWAITX");
|
|
enable_mwaitx_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_ENABLE_IPC_MODE_LEGACY");
|
|
enable_ipc_mode_legacy_ = (var == "0") ? false : true; // Legacy mode by default
|
|
if (os::IsEnvVarSet("HSA_PCS_MAX_DEVICE_BUFFER_SIZE")) {
|
|
var = os::GetEnvVar("HSA_PCS_MAX_DEVICE_BUFFER_SIZE");
|
|
char* end;
|
|
pc_sampling_max_device_buffer_size_ = strtoul(var.c_str(), &end, 10);
|
|
} else {
|
|
pc_sampling_max_device_buffer_size_ = DEFAULT_PCS_MAX_DEVICE_BUFFER_SIZE;
|
|
}
|
|
|
|
// Temporary environment variable to disable CPU affinity override
|
|
// Will either rename to HSA_OVERRIDE_CPU_AFFINITY later or remove completely.
|
|
var = os::GetEnvVar("HSA_OVERRIDE_CPU_AFFINITY_DEBUG");
|
|
override_cpu_affinity_ = (var == "0") ? false : true;
|
|
|
|
var = os::GetEnvVar("HSA_ALLOCATE_QUEUE_DEV_MEM");
|
|
dev_mem_queue_buf_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_WAIT_ANY_DEBUG");
|
|
wait_any_ = (var == "1") ? true : false;
|
|
|
|
/* hsa_signal_wait_relaxed abort timeout */
|
|
var = os::GetEnvVar("HSA_SIGNAL_WAIT_ABORT_TIMEOUT");
|
|
signal_abort_timeout_ = var.empty() ? 0 : atoi(var.c_str());
|
|
|
|
/* Valid inputs are 0-99, HIGH, MAX */
|
|
var = os::GetEnvVar("HSA_ASYNCEVENTS_THREAD_PRIORITY");
|
|
async_events_thread_priority_ = os::OS_THREAD_PRIORITY_DEFAULT;
|
|
if (var == "MAX") {
|
|
async_events_thread_priority_ = os::OS_THREAD_PRIORITY_MAX;
|
|
} else if (var == "HIGH") {
|
|
async_events_thread_priority_ = os::OS_THREAD_PRIORITY_HIGH;
|
|
} else if (var != "") {
|
|
char* end;
|
|
int input = strtol(var.c_str(), &end, 10);
|
|
if (input >= 0 && input <= 99)
|
|
async_events_thread_priority_ = input;
|
|
else
|
|
fprintf(stderr, "Failed to parse HSA_ASYNCEVENTS_THREAD_PRIORITY");
|
|
}
|
|
|
|
var = os::GetEnvVar("HSA_IMAGE_ENABLE_3D_SWIZZLE_DEBUG");
|
|
enable_3d_swizzle_ = (var == "1") ? true : false;
|
|
|
|
// This allows convient usage in scripting for enabling dtif.
|
|
// IE the user should set HSA_DTIF_ENABLED = 1 to enable DTIF.
|
|
// HSA_DTIF_ENABLED = 0 will disable DTIF backend.
|
|
var = os::GetEnvVar("HSA_ENABLE_DTIF");
|
|
enable_dtif_ = (var == "1") ? true : false;
|
|
|
|
var = os::GetEnvVar("HSA_CO_DMACOPY_SIZE");
|
|
co_dmacopy_size_ = var.empty() ? 1024*1024 : atoi(var.c_str());
|
|
}
|
|
|
|
void parse_masks(uint32_t maxGpu, uint32_t maxCU) {
|
|
std::string var = os::GetEnvVar("HSA_CU_MASK");
|
|
parse_masks(var, maxGpu, maxCU);
|
|
}
|
|
|
|
bool wait_any() const { return wait_any_; }
|
|
|
|
bool check_flat_scratch() const { return check_flat_scratch_; }
|
|
|
|
bool enable_vm_fault_message() const { return enable_vm_fault_message_; }
|
|
|
|
bool enable_queue_fault_message() const { return enable_queue_fault_message_; }
|
|
|
|
bool enable_interrupt() const { return enable_interrupt_; }
|
|
|
|
bool enable_sdma_hdp_flush() const { return enable_sdma_hdp_flush_; }
|
|
|
|
bool running_valgrind() const { return running_valgrind_; }
|
|
|
|
bool sdma_wait_idle() const { return sdma_wait_idle_; }
|
|
|
|
bool report_tool_load_failures() const { return report_tool_load_failures_; }
|
|
|
|
bool report_tool_register_failures() const { return report_tool_register_failures_; }
|
|
|
|
bool disable_tool_register() const { return disable_tool_register_; }
|
|
|
|
bool disable_fragment_alloc() const { return disable_fragment_alloc_; }
|
|
|
|
bool rev_copy_dir() const { return rev_copy_dir_; }
|
|
|
|
bool fine_grain_pcie() const { return fine_grain_pcie_; }
|
|
|
|
bool no_scratch_reclaim() const { return no_scratch_reclaim_; }
|
|
|
|
bool no_scratch_thread_limiter() const { return no_scratch_thread_limit_; }
|
|
|
|
SDMA_OVERRIDE enable_sdma() const { return enable_sdma_; }
|
|
|
|
SDMA_OVERRIDE enable_peer_sdma() const { return enable_peer_sdma_; }
|
|
|
|
SDMA_OVERRIDE enable_sdma_gang() const { return enable_sdma_gang_; }
|
|
|
|
SDMA_OVERRIDE enable_sdma_copy_size_override() const { return enable_sdma_copy_size_override_; }
|
|
|
|
SDMA_OVERRIDE enable_sdma_recommended_eng() const { return enable_sdma_recommended_eng_; }
|
|
|
|
std::string visible_gpus() const { return visible_gpus_; }
|
|
|
|
bool filter_visible_gpus() const { return filter_visible_gpus_; }
|
|
|
|
uint32_t max_queues() const { return max_queues_; }
|
|
|
|
size_t scratch_mem_size() const { return scratch_mem_size_; }
|
|
|
|
size_t scratch_single_limit() const { return scratch_single_limit_; }
|
|
|
|
bool enable_scratch_async_reclaim() const { return enable_scratch_async_reclaim_; }
|
|
|
|
bool enable_scratch_alt() const { return enable_scratch_alt_; }
|
|
|
|
size_t scratch_single_limit_async() const { return scratch_single_limit_async_; }
|
|
|
|
std::string tools_lib_names() const { return tools_lib_names_; }
|
|
|
|
bool disable_image() const { return disable_image_; }
|
|
|
|
bool disable_pc_sampling() const { return disable_pc_sampling_; }
|
|
|
|
bool loader_enable_mmap_uri() const { return loader_enable_mmap_uri_; }
|
|
|
|
size_t force_sdma_size() const { return force_sdma_size_; }
|
|
|
|
bool check_sramecc_validity() const { return check_sramecc_validity_; }
|
|
|
|
bool override_cpu_affinity() const { return override_cpu_affinity_; }
|
|
|
|
bool image_print_srd() const { return image_print_srd_; }
|
|
|
|
bool check_mwaitx(bool mwaitx_supported) {
|
|
if (enable_mwaitx_ && !mwaitx_supported) enable_mwaitx_ = false;
|
|
|
|
return enable_mwaitx_;
|
|
}
|
|
|
|
XNACK_REQUEST xnack() const { return xnack_; }
|
|
|
|
bool debug() const { return debug_; }
|
|
|
|
const std::vector<uint32_t>& cu_mask(uint32_t gpu_index) const {
|
|
static const std::vector<uint32_t> empty;
|
|
auto it = cu_mask_.find(gpu_index);
|
|
if (it == cu_mask_.end()) return empty;
|
|
return it->second;
|
|
}
|
|
|
|
bool cu_mask_skip_init() const { return cu_mask_skip_init_; }
|
|
|
|
bool coop_cu_count() const { return coop_cu_count_; }
|
|
|
|
bool discover_copy_agents() const { return discover_copy_agents_; }
|
|
|
|
const std::string& svm_profile() const { return svm_profile_; }
|
|
|
|
SRAMECC_ENABLE sramecc_enable() const { return sramecc_enable_; }
|
|
|
|
bool enable_ipc_mode_legacy() const { return enable_ipc_mode_legacy_; }
|
|
|
|
size_t pc_sampling_max_device_buffer_size() const { return pc_sampling_max_device_buffer_size_; }
|
|
|
|
size_t co_dmacopy_size() const { return co_dmacopy_size_; }
|
|
|
|
bool dev_mem_queue_buf() const { return dev_mem_queue_buf_; }
|
|
|
|
uint32_t signal_abort_timeout() const { return signal_abort_timeout_; }
|
|
|
|
int async_events_thread_priority() const { return async_events_thread_priority_; }
|
|
|
|
bool enable_3d_swizzle() const { return enable_3d_swizzle_; }
|
|
|
|
bool enable_dtif() const { return enable_dtif_; }
|
|
private:
|
|
bool check_flat_scratch_;
|
|
bool enable_vm_fault_message_;
|
|
bool enable_interrupt_;
|
|
bool enable_sdma_hdp_flush_;
|
|
bool running_valgrind_;
|
|
bool sdma_wait_idle_;
|
|
bool enable_queue_fault_message_;
|
|
bool report_tool_load_failures_;
|
|
bool report_tool_register_failures_ = false;
|
|
bool disable_tool_register_ = false;
|
|
bool disable_fragment_alloc_;
|
|
bool rev_copy_dir_;
|
|
bool fine_grain_pcie_;
|
|
bool no_scratch_reclaim_;
|
|
bool no_scratch_thread_limit_;
|
|
bool disable_image_;
|
|
bool disable_pc_sampling_;
|
|
bool loader_enable_mmap_uri_;
|
|
bool check_sramecc_validity_;
|
|
bool debug_;
|
|
bool cu_mask_skip_init_;
|
|
bool coop_cu_count_;
|
|
bool discover_copy_agents_;
|
|
bool override_cpu_affinity_;
|
|
bool image_print_srd_;
|
|
bool enable_mwaitx_;
|
|
bool enable_ipc_mode_legacy_;
|
|
bool wait_any_;
|
|
bool dev_mem_queue_buf_;
|
|
uint32_t signal_abort_timeout_;
|
|
int async_events_thread_priority_;
|
|
bool enable_3d_swizzle_ = false;
|
|
bool enable_dtif_;
|
|
|
|
SDMA_OVERRIDE enable_sdma_;
|
|
SDMA_OVERRIDE enable_peer_sdma_;
|
|
SDMA_OVERRIDE enable_sdma_gang_;
|
|
SDMA_OVERRIDE enable_sdma_copy_size_override_;
|
|
SDMA_OVERRIDE enable_sdma_recommended_eng_;
|
|
|
|
bool filter_visible_gpus_;
|
|
std::string visible_gpus_;
|
|
|
|
uint32_t max_queues_;
|
|
|
|
size_t scratch_mem_size_;
|
|
size_t scratch_single_limit_;
|
|
size_t scratch_single_limit_async_;
|
|
bool enable_scratch_async_reclaim_;
|
|
bool enable_scratch_alt_;
|
|
|
|
std::string tools_lib_names_;
|
|
std::string svm_profile_;
|
|
|
|
size_t force_sdma_size_;
|
|
|
|
// Indicates user preference for Xnack state.
|
|
XNACK_REQUEST xnack_;
|
|
|
|
SRAMECC_ENABLE sramecc_enable_;
|
|
|
|
size_t pc_sampling_max_device_buffer_size_;
|
|
|
|
size_t co_dmacopy_size_;
|
|
|
|
// Map GPU index post RVD to its default cu mask.
|
|
std::map<uint32_t, std::vector<uint32_t>> cu_mask_;
|
|
|
|
void parse_masks(std::string& args, uint32_t maxGpu, uint32_t maxCU);
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(Flag);
|
|
};
|
|
|
|
} // namespace rocr
|
|
|
|
#endif // header guard
|