Fixing aqlprofile ASM statement (#2881)
* Fixing aqlprofile ASM statement * Removing f16 tests
This commit is contained in:
@@ -90,10 +90,8 @@ hsa_status_t AgentInfo::get_agent_handle_cb(hsa_agent_t agent, void* userdata)
|
|||||||
}
|
}
|
||||||
else if (info->gfxip.find("gfx95") == 0)
|
else if (info->gfxip.find("gfx95") == 0)
|
||||||
{
|
{
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP16", 10, 81);
|
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32", 10, 82);
|
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32", 10, 82);
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64", 10, 83);
|
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64", 10, 83);
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP16_TRANS", 10, 84);
|
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32_TRANS", 10, 85);
|
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP32_TRANS", 10, 85);
|
||||||
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64_TRANS", 10, 86);
|
info->add_event(sq, "SQ_INSTS_VALU_FLOPS_FP64_TRANS", 10, 86);
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,14 @@
|
|||||||
#include "workload.hpp"
|
#include "workload.hpp"
|
||||||
#include "hip/hip_runtime.h"
|
#include "hip/hip_runtime.h"
|
||||||
|
|
||||||
|
// Helper macro to detect RDNA3 (gfx11xx) architectures
|
||||||
|
// These architectures default to Real16 mode and require .set fake16 for legacy F16 instructions
|
||||||
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || \
|
||||||
|
defined(__gfx1150__) || defined(__gfx1151__) || defined(__gfx1152__) || defined(__gfx1153__) || \
|
||||||
|
defined(__gfx1200__) || defined(__gfx1201__)
|
||||||
|
#define GFX11_RDNA3_ARCH 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#define DATA_SIZE (64*4)
|
#define DATA_SIZE (64*4)
|
||||||
|
|
||||||
#define HIP_API_CALL(CALL) do { if ((CALL) != hipSuccess) abort(); } while(0)
|
#define HIP_API_CALL(CALL) do { if ((CALL) != hipSuccess) abort(); } while(0)
|
||||||
@@ -123,15 +131,12 @@ __global__ void atomic_kernel(float* a, const float* b)
|
|||||||
|
|
||||||
__global__ void iops_kernel_trans()
|
__global__ void iops_kernel_trans()
|
||||||
{
|
{
|
||||||
// 3 F16 Trans OPS
|
|
||||||
asm volatile("v_cos_f16 v0, v0; v_cos_f16 v1, v1; v_cos_f16 v2, v2;");
|
|
||||||
// 2 F32 Trans OPS
|
// 2 F32 Trans OPS
|
||||||
asm volatile("v_cos_f32 v3, v3; v_cos_f32 v4, v4");
|
asm volatile("v_cos_f32 v3, v3; v_cos_f32 v4, v4");
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void iops_kernel1()
|
__global__ void iops_kernel1()
|
||||||
{
|
{
|
||||||
asm volatile("v_add_f16 v2, v1, v0"); // 1 F16 OPS
|
|
||||||
asm volatile("v_fma_f32 v3, v1, v2, v3"); // 2 F32 OPs
|
asm volatile("v_fma_f32 v3, v1, v2, v3"); // 2 F32 OPs
|
||||||
|
|
||||||
asm volatile("v_add_f64 v[0:1], v[2:3], v[4:5]"); // 1 F64 OP
|
asm volatile("v_add_f64 v[0:1], v[2:3], v[4:5]"); // 1 F64 OP
|
||||||
@@ -141,14 +146,9 @@ __global__ void iops_kernel1()
|
|||||||
|
|
||||||
__global__ void iops_kernel2()
|
__global__ void iops_kernel2()
|
||||||
{
|
{
|
||||||
#if defined(__gfx940__) || defined(__gfx90a__) || defined(__gfx1030__)
|
// Fallback - removed dot2_f32_f16 instruction
|
||||||
// Supported architectures
|
|
||||||
asm volatile("v_dot2_f32_f16 v0, v1, v2, v3");
|
|
||||||
#else
|
|
||||||
// Fallback or skip
|
|
||||||
asm volatile("v_add_f32 v4, v5, v6"); // 1 F32 OP
|
asm volatile("v_add_f32 v4, v5, v6"); // 1 F32 OP
|
||||||
asm volatile("v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]"); // 2 F64 OPs
|
asm volatile("v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]"); // 2 F64 OPs
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -322,10 +322,8 @@ auto iops_counters(std::string_view gfxip)
|
|||||||
|
|
||||||
if (gfxip.find("gfx95") == 0)
|
if (gfxip.find("gfx95") == 0)
|
||||||
{
|
{
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP16");
|
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP32");
|
counters.push_back("SQ_INSTS_VALU_FLOPS_FP32");
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP64");
|
counters.push_back("SQ_INSTS_VALU_FLOPS_FP64");
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP16_TRANS");
|
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP32_TRANS");
|
counters.push_back("SQ_INSTS_VALU_FLOPS_FP32_TRANS");
|
||||||
counters.push_back("SQ_INSTS_VALU_FLOPS_FP64_TRANS");
|
counters.push_back("SQ_INSTS_VALU_FLOPS_FP64_TRANS");
|
||||||
}
|
}
|
||||||
|
|||||||
Fai riferimento in un nuovo problema
Block a user