90e824c63b
Temporarily disable rocmtools because of hsa_shut_down issues Change-Id: I5e8b6729b8200ccdd5c399862bfc632ba69f884c Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
122 строки
10 KiB
Plaintext
122 строки
10 KiB
Plaintext
/*
|
|
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
// Description Fields:
|
|
// Arg # Type Meaning
|
|
// -------------------------------------------------
|
|
// 1 rdc_field_t enum of field
|
|
// 2 string description of enum
|
|
// 3 string rdci display label
|
|
// 4 bool do or do not display in rdci
|
|
// rdc_field_t Description rdci label To Display
|
|
// =========== =========== ========= ==========
|
|
#ifndef FLD_DESC_ENT
|
|
#define FLD_DESC_ENT(ID, DESC, LABEL, DISPLAY)
|
|
#endif
|
|
|
|
FLD_DESC_ENT(RDC_FI_INVALID, "Unknown/Invalid field", "INVALID", false)
|
|
FLD_DESC_ENT(RDC_FI_GPU_COUNT, "GPU count in the system", "GPU_COUNT", true)
|
|
FLD_DESC_ENT(RDC_FI_DEV_NAME, "Name of the device", "DEV_NAME", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_CLOCK, "Current GPU clock frequencies", "GPU_CLOCK", true)
|
|
FLD_DESC_ENT(RDC_FI_MEM_CLOCK, "Current Memory clock frequencies", "MEM_CLOCK", true)
|
|
FLD_DESC_ENT(RDC_FI_MEMORY_TEMP, "Memory temperature in millidegrees Celsius", "MEMORY_TEMP", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsius", "GPU_TEMP", true)
|
|
FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts", "POWER_USAGE", true)
|
|
FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second", "PCIE_TX", true)
|
|
FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second", "PCIE_RX", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage", "GPU_UTIL", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, "Memory usage of the GPU instance in bytes", "GPU_MEMORY_USAGE", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated Single Error Correction", "ECC_CORRECT", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL, "Accumulated Double Error Detection", "ECC_UNCORRECT", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SDMA_SEC, "SDMA Single Error Correction", "ECC_SDMA_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SDMA_DED, "SDMA Double Error Detection", "ECC_SDMA_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_GFX_SEC, "GFX Single Error Correction", "ECC_GFX_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_GFX_DED, "GFX Double Error Detection", "ECC_GFX_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MMHUB_SEC, "MMHUB Single Error Correction", "ECC_MMHUB_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MMHUB_DED, "MMHUB Double Error Detection", "ECC_MMHUB_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_ATHUB_SEC, "ATHUB Single Error Correction", "ECC_ATHUB_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_ATHUB_DED, "ATHUB Double Error Detection", "ECC_ATHUB_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_BIF_SEC, "BIF Single Error Correction", "ECC_BIF_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_BIF_DED, "BIF Double Error Detection", "ECC_BIF_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_HDP_SEC, "HDP Single Error Correction", "ECC_HDP_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_HDP_DED, "HDP Double Error Detection", "ECC_HDP_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_SEC, "XGMI WAFL Single Error Correction", "ECC_XGMI_WAFL_SEC",true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_DED, "XGMI WAFL Double Error Detection", "ECC_XGMI_WAFL_DED",true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_DF_SEC, "DF Single Error Correction", "ECC_DF_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_DF_DED, "DF Double Error Detection", "ECC_DF_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SMN_SEC, "SMN Single Error Correction", "ECC_SMN_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SMN_DED, "SMN Double Error Detection", "ECC_SMN_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SEM_SEC, "SEM Single Error Correction", "ECC_SEM_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SEM_DED, "SEM Double Error Detection", "ECC_SEM_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP0_SEC, "MP0 Single Error Correction", "ECC_MP0_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP0_DED, "MP0 Double Error Detection", "ECC_MP0_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP1_SEC, "MP1 Single Error Correction", "ECC_MP1_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP1_DED, "MP1 Double Error Detection", "ECC_MP1_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_FUSE_SEC, "FUSE Single Error Correction", "ECC_FUSE_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_FUSE_DED, "FUSE Double Error Detection", "ECC_FUSE_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UMC_SEC, "UMC Single Error Correction", "ECC_UMC_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection", "ECC_UMC_DED", true)
|
|
|
|
// ROCProfiler fields
|
|
// This doesn't map to rocprofiler counters directly
|
|
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
|
// See metrics.xml in rocmtools
|
|
//FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "PROF_CU_OCCUPANCY", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "PROF_CU_UTILIZATION", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "PROF_FETCH_SIZE", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_WRITE_SIZE, "kb written to video memory", "PROF_WRITE_SIZE", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "Number of fp16 OPS / second", "PROF_FLOPS_16", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "Number of fp32 OPS / second", "PROF_FLOPS_32", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "Number of fp64 OPS / second", "PROF_FLOPS_64", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_GFLOPS_16, "Number of fp16 GOPS / second", "PROF_GFLOPS_16", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_GFLOPS_32, "Number of fp32 GOPS / second", "PROF_GFLOPS_32", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_GFLOPS_64, "Number of fp64 GOPS / second", "PROF_GFLOPS_64", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_MEMR_BW_KBPNS, "HBM Read Bandwidth in kb/ns", "PROF_MEMR_BW_KBPNS", false)
|
|
//FLD_DESC_ENT(RDC_FI_PROF_MEMW_BW_KBPNS, "HBM Write Bandwidth in kb/ns", "PROF_MEMW_BW_KBPNS", false)
|
|
|
|
// Events
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_REQ_TX, "Outgoing requests to neighbor 0", "XGMI_REQ_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_RESP_TX, "Outgoing responses to neighbor 0", "XGMI_RES_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_BEATS_TX, "Data sent to neighbor 0 (32 byte pkts)", "XGMI_BTS_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_NOP_TX, "NOPs sent to neighbor 1", "XGMI_NOP_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_REQ_TX, "Outgoing requests to neighbor 1", "XGMI_REQ_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_RESP_TX, "Outgoing responses to neighbor 1", "XGMI_RES_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_BEATS_TX, "Data sent to neighbor 1 (32 byte pkts)", "XGMI_BTS_1", false)
|
|
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_THRPUT, "Tx throughput to XGMI neighbor 0 in b/s", "XGMI_0_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_THRPUT, "Tx throughput to XGMI neighbor 1 in b/s", "XGMI_1_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_2_THRPUT, "Tx throughput to XGMI neighbor 2 in b/s", "XGMI_2_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_3_THRPUT, "Tx throughput to XGMI neighbor 3 in b/s", "XGMI_3_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_4_THRPUT, "Tx throughput to XGMI neighbor 4 in b/s", "XGMI_4_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_5_THRPUT, "Tx throughput to XGMI neighbor 5 in b/s", "XGMI_5_T", true)
|
|
|
|
// Asynchronous event notifications
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_VMFAULT, "VM page fault", "VM_PAGE_FAULT", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_THERMAL_THROTTLE, "Clk freq decrease due to temp", "THERMAL_THROT", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_PRE_RESET, "GPU reset is about to occur", "GPU_PRE_RESET", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_POST_RESET, "GPU reset just occurred", "GPU_POST_RESET", false)
|