83cf97e280
Change-Id: Iea3938df9407789c061c3a6ead9167a69069d6e6
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rdc commit: c3a4c899d5]
137 řádky
11 KiB
Plaintext
137 řádky
11 KiB
Plaintext
/*
|
|
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
// Description Fields:
|
|
// Arg # Type Meaning
|
|
// -------------------------------------------------
|
|
// 1 rdc_field_t enum of field
|
|
// 2 string description of enum
|
|
// 3 string rdci display label
|
|
// 4 bool do or do not display in rdci
|
|
// rdc_field_t Description rdci label To Display
|
|
// =========== =========== ========= ==========
|
|
#ifndef FLD_DESC_ENT
|
|
#define FLD_DESC_ENT(ID, DESC, LABEL, DISPLAY)
|
|
#endif
|
|
|
|
FLD_DESC_ENT(RDC_FI_INVALID, "Unknown/Invalid field", "INVALID", false)
|
|
FLD_DESC_ENT(RDC_FI_GPU_COUNT, "GPU count in the system", "GPU_COUNT", true)
|
|
FLD_DESC_ENT(RDC_FI_DEV_NAME, "Name of the device", "DEV_NAME", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_CLOCK, "Current GPU clock frequencies", "GPU_CLOCK", true)
|
|
FLD_DESC_ENT(RDC_FI_MEM_CLOCK, "Current Memory clock frequencies", "MEM_CLOCK", true)
|
|
FLD_DESC_ENT(RDC_FI_MEMORY_TEMP, "Memory temperature in millidegrees Celsius", "MEMORY_TEMP", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsius", "GPU_TEMP", true)
|
|
FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts", "POWER_USAGE", true)
|
|
FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second", "PCIE_TX", true)
|
|
FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second", "PCIE_RX", true)
|
|
FLD_DESC_ENT(RDC_FI_PCIE_BANDWIDTH, "PCIe bandwidth in GB/sec", "PCIE_BANDWIDTH", true)
|
|
|
|
FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage", "GPU_UTIL", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, "Memory usage of the GPU instance in bytes", "GPU_MEMORY_USAGE", true)
|
|
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated Single Error Correction", "ECC_CORRECT", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL, "Accumulated Double Error Detection", "ECC_UNCORRECT", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SDMA_SEC, "SDMA Single Error Correction", "ECC_SDMA_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SDMA_DED, "SDMA Double Error Detection", "ECC_SDMA_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_GFX_SEC, "GFX Single Error Correction", "ECC_GFX_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_GFX_DED, "GFX Double Error Detection", "ECC_GFX_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MMHUB_SEC, "MMHUB Single Error Correction", "ECC_MMHUB_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MMHUB_DED, "MMHUB Double Error Detection", "ECC_MMHUB_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_ATHUB_SEC, "ATHUB Single Error Correction", "ECC_ATHUB_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_ATHUB_DED, "ATHUB Double Error Detection", "ECC_ATHUB_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_BIF_SEC, "BIF Single Error Correction", "ECC_BIF_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_BIF_DED, "BIF Double Error Detection", "ECC_BIF_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_HDP_SEC, "HDP Single Error Correction", "ECC_HDP_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_HDP_DED, "HDP Double Error Detection", "ECC_HDP_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_SEC, "XGMI WAFL Single Error Correction", "ECC_XGMI_WAFL_SEC",true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_XGMI_WAFL_DED, "XGMI WAFL Double Error Detection", "ECC_XGMI_WAFL_DED",true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_DF_SEC, "DF Single Error Correction", "ECC_DF_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_DF_DED, "DF Double Error Detection", "ECC_DF_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SMN_SEC, "SMN Single Error Correction", "ECC_SMN_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SMN_DED, "SMN Double Error Detection", "ECC_SMN_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SEM_SEC, "SEM Single Error Correction", "ECC_SEM_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_SEM_DED, "SEM Double Error Detection", "ECC_SEM_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP0_SEC, "MP0 Single Error Correction", "ECC_MP0_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP0_DED, "MP0 Double Error Detection", "ECC_MP0_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP1_SEC, "MP1 Single Error Correction", "ECC_MP1_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_MP1_DED, "MP1 Double Error Detection", "ECC_MP1_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_FUSE_SEC, "FUSE Single Error Correction", "ECC_FUSE_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_FUSE_DED, "FUSE Double Error Detection", "ECC_FUSE_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UMC_SEC, "UMC Single Error Correction", "ECC_UMC_SEC", true)
|
|
FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection", "ECC_UMC_DED", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_0_READ_KB, "XGMI0 accumulated data read size (KB)", "XGMI_0_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_1_READ_KB, "XGMI1 accumulated data read size (KB)", "XGMI_1_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_2_READ_KB, "XGMI2 accumulated data read size (KB)", "XGMI_2_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_3_READ_KB, "XGMI3 accumulated data read size (KB)", "XGMI_3_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_4_READ_KB, "XGMI4 accumulated data read size (KB)", "XGMI_4_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_5_READ_KB, "XGMI5 accumulated data read size (KB)", "XGMI_5_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_6_READ_KB, "XGMI6 accumulated data read size (KB)", "XGMI_6_READ", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_7_READ_KB, "XGMI7 accumulated data read size (KB)", "XGMI_7_READ", true)
|
|
|
|
FLD_DESC_ENT(RDC_FI_XGMI_0_WRITE_KB, "XGMI0 accumulated data write size (KB)", "XGMI_0_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_1_WRITE_KB, "XGMI1 accumulated data write size (KB)", "XGMI_1_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_2_WRITE_KB, "XGMI2 accumulated data write size (KB)", "XGMI_2_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_3_WRITE_KB, "XGMI3 accumulated data write size (KB)", "XGMI_3_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_4_WRITE_KB, "XGMI4 accumulated data write size (KB)", "XGMI_4_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_5_WRITE_KB, "XGMI5 accumulated data write size (KB)", "XGMI_5_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_6_WRITE_KB, "XGMI6 accumulated data write size (KB)", "XGMI_6_WRITE", true)
|
|
FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)", "XGMI_7_WRITE", true)
|
|
|
|
|
|
|
|
// ROCProfiler fields
|
|
// This doesn't map to rocprofiler counters directly
|
|
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
|
// See metrics.xml in rocprofiler
|
|
FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "", "CU_UTILIZATION", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "", "CU_OCCUPANCY", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_16, "", "FLOPS_16", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_32, "", "FLOPS_32", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_FLOPS_64, "", "FLOPS_64", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "", "ACTIVE_CYCLES", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "", "ACTIVE_WAVES", false)
|
|
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "", "ELAPSED_CYCLES", false)
|
|
|
|
// Events
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_REQ_TX, "Outgoing requests to neighbor 0", "XGMI_REQ_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_RESP_TX, "Outgoing responses to neighbor 0", "XGMI_RES_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_BEATS_TX, "Data sent to neighbor 0 (32 byte pkts)", "XGMI_BTS_0", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_NOP_TX, "NOPs sent to neighbor 1", "XGMI_NOP_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_REQ_TX, "Outgoing requests to neighbor 1", "XGMI_REQ_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_RESP_TX, "Outgoing responses to neighbor 1", "XGMI_RES_1", false)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_BEATS_TX, "Data sent to neighbor 1 (32 byte pkts)", "XGMI_BTS_1", false)
|
|
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_0_THRPUT, "Tx throughput to XGMI neighbor 0 in b/s", "XGMI_0_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_1_THRPUT, "Tx throughput to XGMI neighbor 1 in b/s", "XGMI_1_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_2_THRPUT, "Tx throughput to XGMI neighbor 2 in b/s", "XGMI_2_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_3_THRPUT, "Tx throughput to XGMI neighbor 3 in b/s", "XGMI_3_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_4_THRPUT, "Tx throughput to XGMI neighbor 4 in b/s", "XGMI_4_T", true)
|
|
FLD_DESC_ENT(RDC_EVNT_XGMI_5_THRPUT, "Tx throughput to XGMI neighbor 5 in b/s", "XGMI_5_T", true)
|
|
|
|
// Asynchronous event notifications
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_VMFAULT, "VM page fault", "VM_PAGE_FAULT", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_THERMAL_THROTTLE, "Clk freq decrease due to temp", "THERMAL_THROT", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_PRE_RESET, "GPU reset is about to occur", "GPU_PRE_RESET", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_POST_RESET, "GPU reset just occurred", "GPU_POST_RESET", false)
|
|
FLD_DESC_ENT(RDC_EVNT_NOTIF_RING_HANG, "GPU ring hang just occured", "RING_HANG", false)
|