From 2a64fa5e06e80e0af36df4ce0c76ae52eeec0a9d Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Thu, 9 Jan 2025 11:00:32 -0500 Subject: [PATCH] libhsakmt: gfx950: Add option to enable HIGH_PRECISION Environment variable HSA_HIGH_PRECISION_MODE can be used to control MFMA precision Signed-off-by: Harish Kasiviswanathan Change-Id: Ib78dd9dd8867025e090a3cca96ab6db4f65dea12 --- libhsakmt/include/hsakmt/linux/kfd_ioctl.h | 5 ++++- libhsakmt/src/fmm.c | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h index 675722c2a0..bc388a7529 100644 --- a/libhsakmt/include/hsakmt/linux/kfd_ioctl.h +++ b/libhsakmt/include/hsakmt/linux/kfd_ioctl.h @@ -163,7 +163,7 @@ struct kfd_ioctl_set_memory_policy_args { __u32 gpu_id; /* to KFD */ __u32 default_policy; /* to KFD */ __u32 alternate_policy; /* to KFD */ - __u32 pad; + __u32 misc_process_flag; /* to KFD */ }; /* @@ -363,6 +363,9 @@ enum kfd_dbg_trap_exception_code { #define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +/* Misc. per process flags */ +#define ENABLE_MFMA_HIGH_PRECISION (1 << 0) + enum kfd_dbg_runtime_state { DEBUG_RUNTIME_STATE_DISABLED = 0, DEBUG_RUNTIME_STATE_ENABLED = 1, diff --git a/libhsakmt/src/fmm.c b/libhsakmt/src/fmm.c index 0e9195b987..6ddf363742 100644 --- a/libhsakmt/src/fmm.c +++ b/libhsakmt/src/fmm.c @@ -2004,7 +2004,8 @@ HSAKMT_STATUS hsakmt_fmm_release(void *address) } static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_policy, - uintptr_t alt_base, uint64_t alt_size) + uintptr_t alt_base, uint64_t alt_size, + uint32_t misc_process_flags) { struct kfd_ioctl_set_memory_policy_args args = {0}; @@ -2013,6 +2014,7 @@ static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_po args.alternate_policy = alt_policy; args.alternate_aperture_base = alt_base; args.alternate_aperture_size = alt_size; + args.misc_process_flag = misc_process_flags; return hsakmt_ioctl(hsakmt_kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args); } @@ -2517,10 +2519,10 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes) uint32_t num_of_sysfs_nodes; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr, *reserveSvm; - char *maxVaAlignStr; + char *maxVaAlignStr, *mfmaHighPrecisionModeStr; unsigned int guardPages = 1; uint64_t svm_base = 0, svm_limit = 0; - uint32_t svm_alignment = 0; + uint32_t svm_alignment = 0, mfma_high_precision_mode = 0; /* If HSA_DISABLE_CACHE is set to a non-0 value, disable caching */ disableCache = getenv("HSA_DISABLE_CACHE"); @@ -2549,6 +2551,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes) if (!guardPagesStr || sscanf(guardPagesStr, "%u", &guardPages) != 1) guardPages = 1; + mfmaHighPrecisionModeStr = getenv("HSA_HIGH_PRECISION_MODE"); + mfma_high_precision_mode = (mfmaHighPrecisionModeStr && + strcmp(mfmaHighPrecisionModeStr, "0")); /* Sets the max VA alignment order size during mapping. By default the order * size is set to 18(1G) for GFX950 to reduce TLB hits. If any non-gfx950 * ASIC is found in the system, set back to 9(2MB). @@ -2800,7 +2805,9 @@ HSAKMT_STATUS hsakmt_fmm_init_process_apertures(unsigned int NumNodes) KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT, KFD_IOC_CACHE_POLICY_COHERENT, - alt_base, alt_size); + alt_base, alt_size, + hsakmt_get_gfxv_by_node_id(i) == GFX_VERSION_GFX950 ? + mfma_high_precision_mode : 0); if (err) { pr_err("Failed to set mem policy for GPU [0x%x]\n", process_apertures[i].gpu_id);