From 680a92769c23500aa37591c1238ef6b5440adeac Mon Sep 17 00:00:00 2001 From: Gopesh Bhardwaj Date: Thu, 29 Jan 2026 09:01:41 +0530 Subject: [PATCH] Fixing aqlprofile ASM statement (#2881) * Fixing aqlprofile ASM statement * Removing f16 tests --- .../aqlprofile/test/integration/agent.cpp | 2 -- projects/aqlprofile/test/integration/main.cpp | 20 +++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/projects/aqlprofile/test/integration/agent.cpp b/projects/aqlprofile/test/integration/agent.cpp index d149c3f333..1a1765ec1d 100644 --- a/projects/aqlprofile/test/integration/agent.cpp +++ b/projects/aqlprofile/test/integration/agent.cpp @@ -90,10 +90,8 @@ hsa_status_t AgentInfo::get_agent_handle_cb(hsa_agent_t agent, void* userdata) } else if (info->gfxip.find("gfx95") == 0) { - info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP16", 10, 81); info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32", 10, 82); info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64", 10, 83); - info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP16_TRANS", 10, 84); info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32_TRANS", 10, 85); info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64_TRANS", 10, 86); diff --git a/projects/aqlprofile/test/integration/main.cpp b/projects/aqlprofile/test/integration/main.cpp index 3dc903d6f5..862741ec8f 100644 --- a/projects/aqlprofile/test/integration/main.cpp +++ b/projects/aqlprofile/test/integration/main.cpp @@ -37,6 +37,14 @@ #include "workload.hpp" #include "hip/hip_runtime.h" +// Helper macro to detect RDNA3 (gfx11xx) architectures +// These architectures default to Real16 mode and require .set fake16 for legacy F16 instructions +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \ + defined(__gfx1150__) || defined(__gfx1151__) || defined(__gfx1152__) || defined(__gfx1153__) || \ + defined(__gfx1200__) || defined(__gfx1201__) +#define GFX11_RDNA3_ARCH 1 +#endif + #define DATA_SIZE (64*4) #define HIP_API_CALL(CALL) do { if ((CALL) != hipSuccess) abort(); } while(0) @@ -123,15 +131,12 @@ __global__ void atomic_kernel(float* a, const float* b) __global__ void iops_kernel_trans() { - // 3 F16 Trans OPS - asm volatile("v_cos_f16 v0, v0; v_cos_f16 v1, v1; v_cos_f16 v2, v2;"); // 2 F32 Trans OPS asm volatile("v_cos_f32 v3, v3; v_cos_f32 v4, v4"); } __global__ void iops_kernel1() { - asm volatile("v_add_f16 v2, v1, v0"); // 1 F16 OPS asm volatile("v_fma_f32 v3, v1, v2, v3"); // 2 F32 OPs asm volatile("v_add_f64 v[0:1], v[2:3], v[4:5]"); // 1 F64 OP @@ -141,14 +146,9 @@ __global__ void iops_kernel1() __global__ void iops_kernel2() { - #if defined(__gfx940__) || defined(__gfx90a__) || defined(__gfx1030__) - // Supported architectures - asm volatile("v_dot2_f32_f16 v0, v1, v2, v3"); -#else - // Fallback or skip + // Fallback - removed dot2_f32_f16 instruction asm volatile("v_add_f32 v4, v5, v6"); // 1 F32 OP asm volatile("v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]"); // 2 F64 OPs -#endif } @@ -322,10 +322,8 @@ auto iops_counters(std::string_view gfxip) if (gfxip.find("gfx95") == 0) { - counters.push_back("SQ_INSTS_VALU_FLOPS_FP16"); counters.push_back("SQ_INSTS_VALU_FLOPS_FP32"); counters.push_back("SQ_INSTS_VALU_FLOPS_FP64"); - counters.push_back("SQ_INSTS_VALU_FLOPS_FP16_TRANS"); counters.push_back("SQ_INSTS_VALU_FLOPS_FP32_TRANS"); counters.push_back("SQ_INSTS_VALU_FLOPS_FP64_TRANS"); }