libhsakmt: gfx950: Add option to enable HIGH_PRECISION

Environment variable HSA_HIGH_PRECISION_MODE can be used to control MFMA
precision

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Change-Id: Ib78dd9dd8867025e090a3cca96ab6db4f65dea12
This commit is contained in:
Harish Kasiviswanathan
2025-01-09 11:00:32 -05:00
committato da Yat Sin, David
parent 3be9c49b63
commit 2a64fa5e06
2 ha cambiato i file con 15 aggiunte e 5 eliminazioni
@@ -163,7 +163,7 @@ struct kfd_ioctl_set_memory_policy_args {
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
__u32 pad;
__u32 misc_process_flag; /* to KFD */
};
/*
@@ -363,6 +363,9 @@ enum kfd_dbg_trap_exception_code {
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
(!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
/* Misc. per process flags */
#define ENABLE_MFMA_HIGH_PRECISION (1 << 0)
enum kfd_dbg_runtime_state {
DEBUG_RUNTIME_STATE_DISABLED = 0,
DEBUG_RUNTIME_STATE_ENABLED = 1,
+11 -4
Vedi File
@@ -2004,7 +2004,8 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address)
}
static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_policy,
uintptr_t alt_base, uint64_t alt_size)
uintptr_t alt_base, uint64_t alt_size,
uint32_t misc_process_flags)
{
struct kfd_ioctl_set_memory_policy_args args = {0};
@@ -2013,6 +2014,7 @@ static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_po
args.alternate_policy = alt_policy;
args.alternate_aperture_base = alt_base;
args.alternate_aperture_size = alt_size;
args.misc_process_flag = misc_process_flags;
return hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
}
@@ -2517,10 +2519,10 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
uint32_t num_of_sysfs_nodes;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr, *reserveSvm;
char *maxVaAlignStr;
char *maxVaAlignStr, *mfmaHighPrecisionModeStr;
unsigned int guardPages = 1;
uint64_t svm_base = 0, svm_limit = 0;
uint32_t svm_alignment = 0;
uint32_t svm_alignment = 0, mfma_high_precision_mode = 0;
/* If HSA_DISABLE_CACHE is set to a non-0 value, disable caching */
disableCache = getenv("HSA_DISABLE_CACHE");
@@ -2549,6 +2551,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
if (!guardPagesStr || sscanf(guardPagesStr, "%u", &guardPages) != 1)
guardPages = 1;
mfmaHighPrecisionModeStr = getenv("HSA_HIGH_PRECISION_MODE");
mfma_high_precision_mode = (mfmaHighPrecisionModeStr &&
strcmp(mfmaHighPrecisionModeStr, "0"));
/* Sets the max VA alignment order size during mapping. By default the order
* size is set to 18(1G) for GFX950 to reduce TLB hits. If any non-gfx950
* ASIC is found in the system, set back to 9(2MB).
@@ -2800,7 +2805,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes)
KFD_IOC_CACHE_POLICY_COHERENT :
KFD_IOC_CACHE_POLICY_NONCOHERENT,
KFD_IOC_CACHE_POLICY_COHERENT,
alt_base, alt_size);
alt_base, alt_size,
hsakmt_get_gfxv_by_node_id(i) == GFX_VERSION_GFX950 ?
mfma_high_precision_mode : 0);
if (err) {
pr_err("Failed to set mem policy for GPU [0x%x]\n",
process_apertures[i].gpu_id);