From 3dc4148c4695d268afee7c90f4ccd13a184be8b2 Mon Sep 17 00:00:00 2001 From: "Indic, Vladimir" Date: Tue, 15 Apr 2025 23:04:19 +0200 Subject: [PATCH] MI300 Stochastic PC Sampling Documentation and Changelog (#336) * MI300 Stochastic PC Sampling Documentation * Stochastic PC sampling title renaming --------- Co-authored-by: Welton, Benjamin [ROCm/rocprofiler-sdk commit: 96a0ef244f467e737d7774ada4276e06bd9aadab] --- projects/rocprofiler-sdk/CHANGELOG.md | 8 +- .../data/pc_sampling_stochastic_debug.csv | 98 ++++++++++++++ .../source/docs/how-to/using-pc-sampling.rst | 125 ++++++++++++++++++ 3 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 projects/rocprofiler-sdk/source/docs/data/pc_sampling_stochastic_debug.csv diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index d3903df512..3f2e33c0bb 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -172,9 +172,11 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - Added perfetto support for counter collection. - Added support for negating rocprofv3 tracing options when using aggregate options, e.g. `--sys-trace --hsa-trace=no` - Added `--agent-index` option in rocprofv3 to specify the agent naming convention in the output - - absolute == node_id - - relative == logical_node_id - - type-relative == logical_node_type_id + - absolute == node_id + - relative == logical_node_id + - type-relative == logical_node_type_id +- Added MI300 stochastic (hardware-based) PC sampling support in ROCProfiler-SDK and ROCProfV3 + ### Changed diff --git a/projects/rocprofiler-sdk/source/docs/data/pc_sampling_stochastic_debug.csv b/projects/rocprofiler-sdk/source/docs/data/pc_sampling_stochastic_debug.csv new file mode 100644 index 0000000000..d47551958e --- /dev/null +++ b/projects/rocprofiler-sdk/source/docs/data/pc_sampling_stochastic_debug.csv @@ -0,0 +1,98 @@ +"Sample_Timestamp","Exec_Mask","Dispatch_Id","Instruction","Instruction_Comment","Correlation_Id","Wave_Issued_Instruction","Instruction_Type","Stall_Reason","Wave_Count" +390705261841337,18446744073709551615,24,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",24,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390705261924637,18446744073709551615,29,"v_max_i32_e32 v1, v2, v0","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:77",29,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",6 +390705694732429,18446744073709551615,53,"v_mad_u64_u32 v[0:1], s[2:3], v0, s2, v[2:3]","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:80",53,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",6 +390705694744189,18446744073709551615,54,"v_lshl_add_u64 v[0:1], s[4:5], 0, v[0:1]","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",54,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",4 +390705694769549,18446744073709551615,56,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",56,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390705694772089,18446744073709551615,56,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",56,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390705694810449,18446744073709551615,58,"v_cmp_gt_i32_e32 vcc, s2, v1","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:94",58,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390705694820489,18446744073709551615,59,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",59,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390705694840850,18446744073709551615,60,"s_and_b32 s5, s4, 0xffff","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",60,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390705694856630,18446744073709551615,61,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",61,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706112944694,18446744073709551615,65,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",65,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706112965404,18446744073709551615,66,"global_store_dword v[0:1], v2, off","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",66,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",3 +390706112966284,18446744073709551615,66,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",66,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706112966644,18446744073709551615,66,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",66,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",3 +390706112967404,18446744073709551615,66,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",66,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706112971414,18446744073709551615,66,"s_load_dwordx4 s[4:7], s[0:1], 0x0","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",66,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390706112984885,18446744073709551615,67,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",67,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706112988655,18446744073709551615,67,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",67,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113000775,18446744073709551615,68,"v_add_u32_e32 v0, s3, v0","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:128",68,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390706113004375,18446744073709551615,68,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",68,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113053815,18446744073709551615,69,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",69,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706113059125,18446744073709551615,69,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",69,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113080805,18446744073709551615,70,"s_load_dwordx4 s[4:7], s[0:1], 0x0","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",70,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390706113097725,18446744073709551615,71,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",71,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",3 +390706113101805,18446744073709551615,71,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",71,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113111775,18446744073709551615,72,"v_sub_f32_e32 v4, v2, v3","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",72,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390706113115735,18446744073709551615,72,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",72,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113134725,18446744073709551615,73,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",73,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706113147605,18446744073709551615,74,"s_waitcnt lgkmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:97",74,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113149485,18446744073709551615,74,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",74,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113153735,18446744073709551615,74,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",74,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113179326,18446744073709551615,76,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",76,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",2 +390706113184086,18446744073709551615,76,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",76,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113184406,18446744073709551615,76,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",76,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113206736,18446744073709551615,77,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",77,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",10 +390706113209216,18446744073709551615,77,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",77,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",11 +390706113220016,18446744073709551615,78,"s_load_dword s4, s[0:1], 0x2c","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",78,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",1 +390706113221566,18446744073709551615,78,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",78,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113227816,18446744073709551615,78,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",78,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113234976,18446744073709551615,79,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",79,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113235016,18446744073709551615,79,"s_load_dwordx2 s[0:1], s[0:1], 0x10","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",79,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390706113236806,18446744073709551615,79,"s_and_saveexec_b64 s[2:3], vcc","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:113",79,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",3 +390706113250926,18446744073709551615,80,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",80,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",1 +390706113253456,18446744073709551615,80,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",80,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113255496,18446744073709551615,80,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",80,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113257566,18446744073709551615,80,"v_add_f32_e32 v2, 1.0, v2","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",80,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",4 +390706113270176,18446744073709551615,81,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",81,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",3 +390706113278256,18446744073709551615,81,"s_load_dword s2, s[0:1], 0x18","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",81,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",8 +390706113292776,18446744073709551615,82,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",82,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",3 +390706113301126,18446744073709551615,83,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",83,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",1 +390706113301606,18446744073709551615,83,"s_and_saveexec_b64 s[2:3], vcc","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:113",83,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",3 +390706113303846,18446744073709551615,83,"s_and_saveexec_b64 s[2:3], vcc","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:113",83,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",3 +390706113305086,18446744073709551615,83,"v_lshlrev_b64 v[0:1], 2, v[0:1]","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",83,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390706113317256,18446744073709551615,84,"v_div_fmas_f32 v3, v3, v6, v7","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",84,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390706113318166,18446744073709551615,84,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",84,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113336687,18446744073709551615,85,"global_load_dword v2, v[2:3], off","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",85,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",11 +390706113351087,18446744073709551615,86,"s_mul_i32 s2, s2, s5","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:93",86,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390706113352487,18446744073709551615,86,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",86,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113369607,18446744073709551615,87,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",87,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113373647,18446744073709551615,87,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",87,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113387017,18446744073709551615,88,"s_waitcnt lgkmcnt(0)","/usr/include/hip/amd_detail/amd_hip_runtime.h:275",88,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",1 +390706113390207,18446744073709551615,88,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",88,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113408977,18446744073709551615,89,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706113409057,18446744073709551615,89,"v_add_f32_e32 v2, v2, v3","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",10 +390706113411607,18446744073709551615,89,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",9 +390706113411737,18446744073709551615,89,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706113412777,18446744073709551615,89,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706113415847,18446744073709551615,89,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",89,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",9 +390706113424217,18446744073709551615,90,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",90,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113443127,18446744073709551615,91,"v_add_f32_e32 v2, -1.0, v2","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",91,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",2 +390706113444857,18446744073709551615,91,"v_add_f32_e32 v3, -1.0, v3","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",91,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",3 +390706113459367,18446744073709551615,92,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",92,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113459767,18446744073709551615,92,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",92,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113462927,18446744073709551615,92,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",92,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706113480097,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",15 +390706113484087,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",7 +390706113496167,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113500167,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",9 +390706113506057,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",14 +390706113506327,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706113508577,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",17 +390706113522058,18446744073709551615,93,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",93,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",15 +390706113561378,18446744073709551615,94,"s_waitcnt lgkmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:97",94,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",3 +390706113573138,18446744073709551615,95,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",95,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706526501642,18446744073709551615,112,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",112,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706526582893,18446744073709551615,117,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:82",117,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",8 +390706526594683,18446744073709551615,118,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",118,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706526629813,18446744073709551615,120,"s_waitcnt lgkmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:131",120,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706526629803,18446744073709551615,120,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",120,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706526633683,18446744073709551615,120,"v_mul_f32_e32 v7, v3, v6","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",120,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",4 +390706526634933,18446744073709551615,120,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",120,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706526665053,18446744073709551615,122,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",122,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",4 +390706526677283,18446744073709551615,123,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",123,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",6 +390706526695733,18446744073709551615,124,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:133",124,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706526809694,18446744073709551615,126,"v_and_b32_e32 v2, 0x7fffffff, v2","/opt/rocm-6.5.0/lib/llvm/lib/clang/19/include/__clang_hip_math.h:427",126,1,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT",5 +390706526810014,18446744073709551615,126,"s_waitcnt vmcnt(0)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:99",126,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 +390706526821284,18446744073709551615,127,"s_waitcnt vmcnt(1)","/home/vlaindic/git/rocprofiler-sdk-internal/tests/bin/vector-operations/vector-ops.cpp:116",127,0,"ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST","ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",5 diff --git a/projects/rocprofiler-sdk/source/docs/how-to/using-pc-sampling.rst b/projects/rocprofiler-sdk/source/docs/how-to/using-pc-sampling.rst index 42d0e19c0e..824b25bee3 100644 --- a/projects/rocprofiler-sdk/source/docs/how-to/using-pc-sampling.rst +++ b/projects/rocprofiler-sdk/source/docs/how-to/using-pc-sampling.rst @@ -180,3 +180,128 @@ The preceding command generates a JSON file with the comprehensive output. Here } For description of the fields in the JSON output, see :ref:`output-file-fields`. + + +Hardware-Based (Stochastic) PC Sampling Method +=============================================== + +The new ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` has been introduced for gfx942 architecture. +It employes a specific hardware for probing waves actively running on GPU. +Beside information already provided with ``ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP`` useful for determining hot-spots within the kernel, +it delivers additional information that tells whether a sampled wave issued an instruction represented with particular PC. +If not, it tells what is the reason for not issuing the instruction (stall reason). +This type of information is particularly useful for understanding stalls during the kernel execution. + +To use this method on gfx942, we recommend listing available PC sampling configurations to verify if the latest ROCm stack is installed +on the system by running: + +.. code-block:: bash + + rocprofv3 -L + +Outputi similar to the following indicates that the ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` method is available: + +.. code-block:: bash + + Method: ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC + Unit: ROCPROFILER_PC_SAMPLING_UNIT_CYCLES + Minimum_Interval: 256 + Maximum_Interval: 2147483648 + +Please note that on gfx942, ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` requires intervals to be specified in cycles whose value are power of 2. + +To profile a gfx942 accelarated application with ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` PC sampling, one can use the following command: + +.. code-block:: bash + + rocprofv3 --pc-sampling-beta-enabled --pc-sampling-method stochastic --pc-sampling-unit cycles --pc-sampling-interval 1048576 --output-format csv, json -- + +The previous command serializes samples in both CSV and JSON output formats in the ``pc_sampling_stochastic.csv`` and ``out_results.json`` files, respectively. + +Comparing the ``pc_sampling_stochastic.csv`` to ``pc_sampling_host_trap`` from previous section, one can notice that the ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` method +generates additional fields: +- ``Wave_Issued_Instruction``: Indicates whether the wave issued an instruction (value 1) represented with particular PC or not (value 0) +- ``Instruction_Type``: If the value of ``Wave_Issued_Instruction`` is 1, this fields indicates the type of the issued instruction. Otherwise, this fields irrelevant. +- ``Stall_Reason``: If the value of ``Wave_Issued_Instruction`` is 0, this fields indicates the reason for not issuing the instruction (stall reason). Otherwise, this field is irrelevant. +- ``Wave_Count``: Total number of waves actively running on a compute unit when the sample was generated. + +.. csv-table:: PC sampling stochastic with debug symbols + :file: /data/pc_sampling_stochastic_debug.csv + :widths: 20,10,10,10,10,20,10,20,20,10 + :header-rows: 1 + +Similarly, ``ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC`` method delievers additional information to every sample in the JSON output. +The following snippet shows one sample from ``out_results.json`` file. + +.. code-block:: text + + { + "record": { + "flags": { + "has_mem_cnt": 0 + }, + "hw_id": { + "chiplet": 4, + "wave_id": 0, + "simd_id": 2, + "pipe_id": 3, + "cu_or_wgp_id": 1, + "shader_array_id": 0, + "shader_engine_id": 3, + "workgroup_id": 0, + "vm_id": 3, + "queue_id": 2, + "microengine_id": 1 + }, + "pc": { + "code_object_id": 2, + "code_object_offset": 13880 + }, + "exec_mask": 18446744073709551615, + "timestamp": 390705261924637, + "dispatch_id": 29, + "corr_id": { + "internal": 29, + "external": 0 + }, + "wrkgrp_id": { + "x": 9, + "y": 489, + "z": 0 + }, + "wave_in_grp": 0, + "wave_issued": 1, + "inst_type": "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU", + "wave_cnt": 6, + "snapshot": { + "stall_reason": "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT", + "dual_issue_valu": 0, + "arb_state_issue_valu": 1, + "arb_state_issue_matrix": 0, + "arb_state_issue_lds": 0, + "arb_state_issue_lds_direct": 0, + "arb_state_issue_scalar": 0, + "arb_state_issue_vmem_tex": 0, + "arb_state_issue_flat": 0, + "arb_state_issue_exp": 0, + "arb_state_issue_misc": 0, + "arb_state_issue_brmsg": 0, + "arb_state_stall_valu": 0, + "arb_state_stall_matrix": 0, + "arb_state_stall_lds": 0, + "arb_state_stall_lds_direct": 0, + "arb_state_stall_scalar": 0, + "arb_state_stall_vmem_tex": 0, + "arb_state_stall_flat": 0, + "arb_state_stall_exp": 0, + "arb_state_stall_misc": 0, + "arb_state_stall_brmsg": 0 + } + }, + "inst_index": 1 + }, + +Fields starting with ``arb_state_`` are of particular interest as they indicate the state of the arbiter at the time of sampling. +Namely, ``arb_state_issue_`` fields indicate what type of instructions arbiter issued at the time of sampling. +On the other hand, ``arb_state_stall_`` fields indicate what type of instructions were stalled at the time of sampling. +This information is useful for understanding how many instructions per cycle (IPC) are issued.