From ebe22b59076479682f260cdae7246c2818c0f1d1 Mon Sep 17 00:00:00 2001 From: vedithal-amd Date: Fri, 9 Jan 2026 09:06:52 -0500 Subject: [PATCH] Add pre-processor guards for rocflop (#2534) --- .../rocprofiler-compute/sample/rocflop.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/projects/rocprofiler-compute/sample/rocflop.cpp b/projects/rocprofiler-compute/sample/rocflop.cpp index e8133da9e0..4f4253c1d9 100644 --- a/projects/rocprofiler-compute/sample/rocflop.cpp +++ b/projects/rocprofiler-compute/sample/rocflop.cpp @@ -112,8 +112,8 @@ __global__ void matmul_fp32_throughput(float* inputs, vec4* outputs, int } #endif // !defined(__gfx906__) -// SMFMAC (Sparse MFMA) instructions are only available on gfx90a and later (not on gfx906 or gfx908) -#if !defined(__gfx906__) && !defined(__gfx908__) +// SMFMAC (Sparse MFMA) instructions are only available on gfx940 and later (not on gfx906, gfx908, or gfx90a) +#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__) __global__ void sparse_matmul_fp16_throughput(vec4* input0, vec8* input1, vec4* outputs, int count) { int grid_size = gridDim.x * blockDim.x; @@ -149,7 +149,7 @@ __global__ void sparse_matmul_fp16_throughput(vec4* input0, vec8 double matmul_throughput_test(int devic } #endif // !defined(__gfx906__) -#if !defined(__gfx906__) && !defined(__gfx908__) +#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__) template double sparse_matmul_throughput_test(int device, int count, int runs = 1) { const int wave_size = 64; @@ -376,7 +376,7 @@ template double sparse_matmul_throughput_test(in return flops; } -#endif // !defined(__gfx906__) && !defined(__gfx908__) +#endif // !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__) struct Result { int device = -1; @@ -480,17 +480,17 @@ Result run_tests(int device, int runs, uint32_t mask) } #endif -#if !defined(__gfx906__) && !defined(__gfx908__) +#if !defined(__gfx906__) && !defined(__gfx908__) && !defined(__gfx90a__) if(mask & SMATRIX_FP16) { - // SMFMAC only available on gfx90a (MI200) and later, not on gfx906 or gfx908 - if(arch.major == 0x9 && (arch.minor > 0x4 || (arch.minor == 0 && arch.rev >= 0xa))) { + // SMFMAC only available on gfx940 (MI300) and later, not on gfx906, gfx908, or gfx90a + if(arch.major == 0x9 && arch.minor >= 0x4) { res.smfmac_fp16 = sparse_matmul_throughput_test(device, 4096, runs); } else { res.smfmac_fp16 = 0; } } #else - // SMFMAC not available when compiling for gfx906 or gfx908 + // SMFMAC not available when compiling for gfx906, gfx908, or gfx90a if(mask & SMATRIX_FP16) { res.smfmac_fp16 = 0; }