[SWDEV-517396] Align rdc_field with rdc_bootstrap

Signed-off-by: adapryor <Adam.pryor@amd.com>
Change-Id: I5e05e25c5980a3141665ae2d13a6ae09207ccb41


[ROCm/rdc commit: 9571dad23d]
Αυτή η υποβολή περιλαμβάνεται σε:
adapryor
2025-02-26 09:49:14 -06:00
υποβλήθηκε από Galantsev, Dmitrii
γονέας 705d42f0f5
υποβολή fbeacaff0c
4 αρχεία άλλαξαν με 85 προσθήκες και 62 διαγραφές
@@ -110,7 +110,6 @@ FLD_DESC_ENT(RDC_FI_XGMI_4_READ_KB, "XGMI4 accumulated data read size (KB)"
FLD_DESC_ENT(RDC_FI_XGMI_5_READ_KB, "XGMI5 accumulated data read size (KB)", "XGMI_5_READ", true)
FLD_DESC_ENT(RDC_FI_XGMI_6_READ_KB, "XGMI6 accumulated data read size (KB)", "XGMI_6_READ", true)
FLD_DESC_ENT(RDC_FI_XGMI_7_READ_KB, "XGMI7 accumulated data read size (KB)", "XGMI_7_READ", true)
FLD_DESC_ENT(RDC_FI_XGMI_TOTAL_READ_KB, "XGMI accumlated data read size across all lanes (KB)", "XGMI_TOTAL_READ", true)
FLD_DESC_ENT(RDC_FI_XGMI_0_WRITE_KB, "XGMI0 accumulated data write size (KB)", "XGMI_0_WRITE", true)
FLD_DESC_ENT(RDC_FI_XGMI_1_WRITE_KB, "XGMI1 accumulated data write size (KB)", "XGMI_1_WRITE", true)
@@ -120,6 +119,7 @@ FLD_DESC_ENT(RDC_FI_XGMI_4_WRITE_KB, "XGMI4 accumulated data write size (KB)
FLD_DESC_ENT(RDC_FI_XGMI_5_WRITE_KB, "XGMI5 accumulated data write size (KB)", "XGMI_5_WRITE", true)
FLD_DESC_ENT(RDC_FI_XGMI_6_WRITE_KB, "XGMI6 accumulated data write size (KB)", "XGMI_6_WRITE", true)
FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)", "XGMI_7_WRITE", true)
FLD_DESC_ENT(RDC_FI_XGMI_TOTAL_READ_KB, "XGMI accumlated data read size across all lanes (KB)", "XGMI_TOTAL_READ", true)
FLD_DESC_ENT(RDC_FI_XGMI_TOTAL_WRITE_KB, "XGMI accumlated data write size across all lanes (KB)", "XGMI_TOTAL_WRITE", true)
@@ -539,37 +539,46 @@ The ``dmon`` command monitors GPU index 0, field 600, and 601, where 600 is for
% rdci dmon -l
... ...
600 RDC_FI_ECC_CORRECT_TOTAL : Accumulated Single Error Correction.
601 RDC_FI_ECC_UNCORRECT_TOTAL : Accumulated Double Error Detection.
602 RDC_FI_ECC_SDMA_SEC : SDMA Single Error Correction.
603 RDC_FI_ECC_SDMA_DED : SDMA Double Error Detection.
604 RDC_FI_ECC_GFX_SEC : GFX Single Error Correction.
605 RDC_FI_ECC_GFX_DED : GFX Double Error Detection.
606 RDC_FI_ECC_MMHUB_SEC : MMHUB Single Error Correction.
607 RDC_FI_ECC_MMHUB_DED : MMHUB Double Error Detection.
608 RDC_FI_ECC_ATHUB_SEC : ATHUB Single Error Correction.
609 RDC_FI_ECC_ATHUB_DED : ATHUB Double Error Detection.
610 RDC_FI_ECC_BIF_SEC : BIF Single Error Correction.
611 RDC_FI_ECC_BIF_DED : BIF Double Error Detection.
612 RDC_FI_ECC_HDP_SEC : HDP Single Error Correction.
613 RDC_FI_ECC_HDP_DED : HDP Double Error Detection.
614 RDC_FI_ECC_XGMI_WAFL_SEC : XGMI WAFL Single Error Correction.
615 RDC_FI_ECC_XGMI_WAFL_DED : XGMI WAFL Double Error Detection.
616 RDC_FI_ECC_DF_SEC : DF Single Error Correction.
617 RDC_FI_ECC_DF_DED : DF Double Error Detection.
618 RDC_FI_ECC_SMN_SEC : SMN Single Error Correction.
619 RDC_FI_ECC_SMN_DED : SMN Double Error Detection.
620 RDC_FI_ECC_SEM_SEC : SEM Single Error Correction.
621 RDC_FI_ECC_SEM_DED : SEM Double Error Detection.
622 RDC_FI_ECC_MP0_SEC : MP0 Single Error Correction.
623 RDC_FI_ECC_MP0_DED : MP0 Double Error Detection.
624 RDC_FI_ECC_MP1_SEC : MP1 Single Error Correction.
625 RDC_FI_ECC_MP1_DED : MP1 Double Error Detection.
626 RDC_FI_ECC_FUSE_SEC : FUSE Single Error Correction.
627 RDC_FI_ECC_FUSE_DED : FUSE Double Error Detection.
628 RDC_FI_ECC_UMC_SEC : UMC Single Error Correction.
629 RDC_FI_ECC_UMC_DED : UMC Double Error Detection.
600 RDC_FI_ECC_CORRECT_TOTAL : Accumulated Single Error Correction
601 RDC_FI_ECC_UNCORRECT_TOTAL : Accumulated Double Error Detection
602 RDC_FI_ECC_SDMA_CE : SDMA Correctable Error
603 RDC_FI_ECC_SDMA_UE : SDMA Uncorrectable Error
604 RDC_FI_ECC_GFX_CE : GFX Correctable Error
605 RDC_FI_ECC_GFX_UE : GFX Uncorrectable Error
606 RDC_FI_ECC_MMHUB_CE : MMHUB Correctable Error
607 RDC_FI_ECC_MMHUB_UE : MMHUB Uncorrectable Error
608 RDC_FI_ECC_ATHUB_CE : ATHUB Correctable Error
609 RDC_FI_ECC_ATHUB_UE : ATHUB Uncorrectable Error
610 RDC_FI_ECC_PCIE_BIF_CE : PCIE_BIF Correctable Error
611 RDC_FI_ECC_PCIE_BIF_UE : PCIE_BIF Uncorrectable Error
612 RDC_FI_ECC_HDP_CE : HDP Correctable Error
613 RDC_FI_ECC_HDP_UE : HDP Uncorrectable Error
614 RDC_FI_ECC_XGMI_WAFL_CE : XGMI WAFL Correctable Error
615 RDC_FI_ECC_XGMI_WAFL_UE : XGMI WAFL Uncorrectable Error
616 RDC_FI_ECC_DF_CE : DF Correctable Error
617 RDC_FI_ECC_DF_UE : DF Uncorrectable Error
618 RDC_FI_ECC_SMN_CE : SMN Correctable Error
619 RDC_FI_ECC_SMN_UE : SMN Uncorrectable Error
620 RDC_FI_ECC_SEM_CE : SEM Correctable Error
621 RDC_FI_ECC_SEM_UE : SEM Uncorrectable Error
622 RDC_FI_ECC_MP0_CE : MP0 Correctable Error
623 RDC_FI_ECC_MP0_UE : MP0 Uncorrectable Error
624 RDC_FI_ECC_MP1_CE : MP1 Correctable Error
625 RDC_FI_ECC_MP1_UE : MP1 Uncorrectable Error
626 RDC_FI_ECC_FUSE_CE : FUSE Correctable Error
627 RDC_FI_ECC_FUSE_UE : FUSE Uncorrectable Error
628 RDC_FI_ECC_UMC_CE : UMC Correctable Error
629 RDC_FI_ECC_UMC_UE : UMC Uncorrectable Error
630 RDC_FI_ECC_MCA_CE : MCA Correctable Error
631 RDC_FI_ECC_MCA_UE : MCA Uncorrectable Error
632 RDC_FI_ECC_VCN_CE : VCN Correctable Error
633 RDC_FI_ECC_VCN_UE : VCN Uncorrectable Error
634 RDC_FI_ECC_JPEG_CE : JPEG Correctable Error
635 RDC_FI_ECC_JPEG_UE : JPEG Uncorrectable Error
636 RDC_FI_ECC_IH_CE : IH Correctable Error
637 RDC_FI_ECC_IH_UE : IH Uncorrectable Error
638 RDC_FI_ECC_MPIO_CE : MPIO Correctable Error
639 RDC_FI_ECC_MPIO_UE : MPIO Uncorrectable Error
... ...
@@ -128,8 +128,7 @@ int run() {
field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE);
field_ids.push_back(RDC_FI_POWER_USAGE);
// profiler metrics
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_CU);
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_ACTIVE_CU);
field_ids.push_back(RDC_FI_PROF_OCCUPANCY_PERCENT);
field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES);
field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES);
field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES);
@@ -93,36 +93,47 @@ class rdc_field_t(c_int):
RDC_FI_GPU_MEMORY_ACTIVITY = 505
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH = 506
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH = 507
RDC_FI_GPU_PAGE_RETRIED = 550
RDC_FI_ECC_CORRECT_TOTAL = 600
RDC_FI_ECC_UNCORRECT_TOTAL = 601
RDC_FI_ECC_SDMA_SEC = 602
RDC_FI_ECC_SDMA_DED = 603
RDC_FI_ECC_GFX_SEC = 604
RDC_FI_ECC_GFX_DED = 605
RDC_FI_ECC_MMHUB_SEC = 606
RDC_FI_ECC_MMHUB_DED = 607
RDC_FI_ECC_ATHUB_SEC = 608
RDC_FI_ECC_ATHUB_DED = 609
RDC_FI_ECC_BIF_SEC = 610
RDC_FI_ECC_BIF_DED = 611
RDC_FI_ECC_HDP_SEC = 612
RDC_FI_ECC_HDP_DED = 613
RDC_FI_ECC_XGMI_WAFL_SEC = 614
RDC_FI_ECC_XGMI_WAFL_DED = 615
RDC_FI_ECC_DF_SEC = 616
RDC_FI_ECC_DF_DED = 617
RDC_FI_ECC_SMN_SEC = 618
RDC_FI_ECC_SMN_DED = 619
RDC_FI_ECC_SEM_SEC = 620
RDC_FI_ECC_SEM_DED = 621
RDC_FI_ECC_MP0_SEC = 622
RDC_FI_ECC_MP0_DED = 623
RDC_FI_ECC_MP1_SEC = 624
RDC_FI_ECC_MP1_DED = 625
RDC_FI_ECC_FUSE_SEC = 626
RDC_FI_ECC_FUSE_DED = 627
RDC_FI_ECC_UMC_SEC = 628
RDC_FI_ECC_UMC_DED = 629
RDC_FI_ECC_SDMA_CE = 602
RDC_FI_ECC_SDMA_UE = 603
RDC_FI_ECC_GFX_CE = 604
RDC_FI_ECC_GFX_UE = 605
RDC_FI_ECC_MMHUB_CE = 606
RDC_FI_ECC_MMHUB_UE = 607
RDC_FI_ECC_ATHUB_CE = 608
RDC_FI_ECC_ATHUB_UE = 609
RDC_FI_ECC_PCIE_BIF_CE = 610
RDC_FI_ECC_PCIE_BIF_UE = 611
RDC_FI_ECC_HDP_CE = 612
RDC_FI_ECC_HDP_UE = 613
RDC_FI_ECC_XGMI_WAFL_CE = 614
RDC_FI_ECC_XGMI_WAFL_UE = 615
RDC_FI_ECC_DF_CE = 616
RDC_FI_ECC_DF_UE = 617
RDC_FI_ECC_SMN_CE = 618
RDC_FI_ECC_SMN_UE = 619
RDC_FI_ECC_SEM_CE = 620
RDC_FI_ECC_SEM_UE = 621
RDC_FI_ECC_MP0_CE = 622
RDC_FI_ECC_MP0_UE = 623
RDC_FI_ECC_MP1_CE = 624
RDC_FI_ECC_MP1_UE = 625
RDC_FI_ECC_FUSE_CE = 626
RDC_FI_ECC_FUSE_UE = 627
RDC_FI_ECC_UMC_CE = 628
RDC_FI_ECC_UMC_UE = 629
RDC_FI_ECC_MCA_CE = 630
RDC_FI_ECC_MCA_UE = 631
RDC_FI_ECC_VCN_CE = 632
RDC_FI_ECC_VCN_UE = 633
RDC_FI_ECC_JPEG_CE = 634
RDC_FI_ECC_JPEG_UE = 635
RDC_FI_ECC_IH_CE = 636
RDC_FI_ECC_IH_UE = 637
RDC_FI_ECC_MPIO_CE = 638
RDC_FI_ECC_MPIO_UE = 639
RDC_FI_XGMI_0_READ_KB = 700
RDC_FI_XGMI_1_READ_KB = 701
RDC_FI_XGMI_2_READ_KB = 702
@@ -169,6 +180,10 @@ class rdc_field_t(c_int):
RDC_EVNT_XGMI_1_BEATS_TX = 1007
RDC_EVNT_XGMI_0_THRPUT = 1500
RDC_EVNT_XGMI_1_THRPUT = 1501
RDC_EVNT_XGMI_2_THRPUT = 1502
RDC_EVNT_XGMI_3_THRPUT = 1503
RDC_EVNT_XGMI_4_THRPUT = 1504
RDC_EVNT_XGMI_5_THRPUT = 1505
RDC_EVNT_NOTIF_VMFAULT = 2000
RDC_EVNT_NOTIF_THERMAL_THROTTLE = 2001
RDC_EVNT_NOTIF_PRE_RESET = 2002