[gfx12] Add support for gfx1200 (#131)

* [gfx12] Add support for gfx1200

* [gfx12] Rename CP_PERFMON_CNTL_1 to CP_PERFMON_CNTL for better compatibility
Tá an tiomantas seo le fáil i:
Ma, Bing
2025-06-25 13:28:51 -07:00
tiomanta ag GitHub
tuismitheoir 5bcad05345
tiomantas c872dcda53
D'athraigh 5 comhad le 62 breiseanna agus 22 scriosta
+19 -7
Féach ar an gComhad
@@ -97,15 +97,15 @@ enum SpmSeBlockId {
SPM_SE_BLOCK_NAME_LAST = SPM_SE_BLOCK_NAME_UTCL1,
};
namespace gfx1201 {
// IP versions for Radeon RX 9070
// ip_block : gc_12_0_1
namespace gfx1200 {
// ip_block : athub_4_1_0
// ip_block : gc_12_0_0
// ip_block : sdma_7_0_0
// Number of block instances
// Reference: global_features.h (from gfxip header file package)
static const uint32_t ChaCounterBlockNumInstances = 1;
static const uint32_t ChcCounterBlockNumInstances = 4;
static const uint32_t ChcCounterBlockNumInstances = 2;
static const uint32_t CpcCounterBlockNumInstances = 1;
static const uint32_t CpfCounterBlockNumInstances = 1;
static const uint32_t CpgCounterBlockNumInstances = 1;
@@ -113,12 +113,12 @@ static const uint32_t GcmcVmL2CounterBlockNumInstances = 1;
static const uint32_t GcrCounterBlockNumInstances = 1;
static const uint32_t Gcutcl2CounterBlockNumInstances = 1;
static const uint32_t Gcvml2CounterBlockNumInstances = 1;
static const uint32_t GcEaCpwdCounterBlockNumInstances = 36;
static const uint32_t GcEaSeCounterBlockNumInstances = 4;
static const uint32_t GcEaCpwdCounterBlockNumInstances = 18;
static const uint32_t GcEaSeCounterBlockNumInstances = 8;
static const uint32_t Gl1aCounterBlockNumInstances = 1;
static const uint32_t Gl1cCounterBlockNumInstances = 4;
static const uint32_t Gl2aCounterBlockNumInstances = 4;
static const uint32_t Gl2cCounterBlockNumInstances = 32;
static const uint32_t Gl2cCounterBlockNumInstances = 16;
static const uint32_t GrbmCounterBlockNumInstances = 1;
static const uint32_t GrbmhCounterBlockNumInstances = 1;
static const uint32_t RlcCounterBlockNumInstances = 1;
@@ -190,6 +190,18 @@ static const uint32_t TaCounterBlockMaxEvent = 254;
static const uint32_t TcpCounterBlockMaxEvent = 99;
static const uint32_t TdCounterBlockMaxEvent = 271;
static const uint32_t Utcl1CounterBlockMaxEvent = 71;
} // namespace gfx1200
namespace gfx1201 {
// ip_block : athub_4_1_0
// ip_block : gc_12_0_1
// ip_block : sdma_7_0_1
// Number of block instances
static const uint32_t ChcCounterBlockNumInstances = 4;
static const uint32_t GcEaCpwdCounterBlockNumInstances = 36;
static const uint32_t GcEaSeCounterBlockNumInstances = 4;
static const uint32_t Gl2cCounterBlockNumInstances = 32;
} // namespace gfx1201
static const uint32_t SdmaCounterBlockMaxInstances = 8;
+10 -2
Féach ar an gComhad
@@ -56,7 +56,7 @@
namespace gfxip {
namespace gfx12 {
namespace gfx1201 {
namespace gfx1200 {
// Counter register info - Auto-generated from chip_offset_byte.h, edit with extra caution
static const CounterRegInfo ChaCounterRegAddr[] = {REG_INFO_4(CHA)};
static const CounterRegInfo ChcCounterRegAddr[] = {REG_INFO_4(CHC)};
@@ -142,7 +142,15 @@ static const GpuBlockInfo SqcCounterBlockInfo = {"SQ", __BLOCK_ID_HSA(SQ), SqcCo
static const GpuBlockInfo TaCounterBlockInfo = {"TA", __BLOCK_ID_HSA(TA), TaCounterBlockNumInstances, TaCounterBlockMaxEvent, TaCounterBlockNumCounters, TaCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TaBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TA};
static const GpuBlockInfo TdCounterBlockInfo = {"TD", __BLOCK_ID_HSA(TD), TdCounterBlockNumInstances, TdCounterBlockMaxEvent, TdCounterBlockNumCounters, TdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TD};
static const GpuBlockInfo TcpCounterBlockInfo = {"TCP", __BLOCK_ID_HSA(TCP), TcpCounterBlockNumInstances, TcpCounterBlockMaxEvent, TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr|CounterBlockSaAttr|CounterBlockWgpAttr|CounterBlockTcAttr, NULL/*TdBlockDelayInfo*/, SPM_SE_BLOCK_NAME_TCP};
} // namespace gfx12xx
} // namespace gfx1200
namespace gfx1201 {
static const GpuBlockInfo Gl2cCounterBlockInfo = {"GL2C", __BLOCK_ID_HSA(GL2C), gfx1201::Gl2cCounterBlockNumInstances, Gl2cCounterBlockMaxEvent, Gl2cCounterBlockNumCounters, Gl2cCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo ChcCounterBlockInfo = {"CHC", __BLOCK_ID(CHC), gfx1201::ChcCounterBlockNumInstances, ChcCounterBlockMaxEvent, ChcCounterBlockNumCounters, ChcCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr|CounterBlockTcAttr};
static const GpuBlockInfo GceaCounterBlockInfo = {"GCEA", __BLOCK_ID_HSA(GCEA), gfx1201::GcEaCpwdCounterBlockNumInstances, GcEaCpwdCounterBlockMaxEvent, GcEaCpwdCounterBlockNumCounters, GcEaCpwdCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockDfltAttr};
static const GpuBlockInfo GceaSeCounterBlockInfo = {"GCEA_SE", __BLOCK_ID(GCEA_SE), gfx1201::GcEaSeCounterBlockNumInstances, GcEaSeCounterBlockMaxEvent, GcEaSeCounterBlockNumCounters, GcEaSeCounterRegAddr, gfx12_cntx_prim::select_value, CounterBlockSeAttr};
} // namespace gfx1201
} // namespace gfx12
} // namespace gfxip
+12 -12
Féach ar an gComhad
@@ -44,7 +44,7 @@ class gfx12_cntx_prim {
REG_32B_ADDR(GC, 0, regCOMPUTE_PERFCOUNT_ENABLE);
static constexpr Register RLC_PERFMON_CLK_CNTL_ADDR =
REG_32B_ADDR(GC, 0, regRLC_PERFMON_CNTL); // REG_32B_ADDR(GC, 0, regRLC_PERFMON_CLK_CNTL);
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL_1);
static constexpr Register CP_PERFMON_CNTL_ADDR = REG_32B_ADDR(GC, 0, regCP_PERFMON_CNTL);
static constexpr Register COMPUTE_THREAD_TRACE_ENABLE_ADDR =
REG_32B_ADDR(GC, 0, regCOMPUTE_THREAD_TRACE_ENABLE);
@@ -241,29 +241,29 @@ class gfx12_cntx_prim {
return grbm_gfx_index;
}
// CP_PERFMON_CNTL_1 value to reset counters
// CP_PERFMON_CNTL value to reset counters
static uint32_t cp_perfmon_cntl_reset_value() {
uint32_t cp_perfmon_cntl{0};
return cp_perfmon_cntl;
}
// CP_PERFMON_CNTL_1 value to start counters
// CP_PERFMON_CNTL value to start counters
static uint32_t cp_perfmon_cntl_start_value() {
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1);
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1);
return cp_perfmon_cntl;
}
// CP_PERFMON_CNTL_1 value to stop/freeze counters
// CP_PERFMON_CNTL value to stop/freeze counters
static uint32_t cp_perfmon_cntl_stop_value() {
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 2) |
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 2) |
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
return cp_perfmon_cntl;
}
// CP_PERFMON_CNTL_1 value to stop/freeze counters
// CP_PERFMON_CNTL value to stop/freeze counters
static uint32_t cp_perfmon_cntl_read_value() {
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_STATE, 1) |
SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, PERFMON_SAMPLE_ENABLE, 1);
uint32_t cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_STATE, 1) |
SET_REG_FIELD_BITS(CP_PERFMON_CNTL, PERFMON_SAMPLE_ENABLE, 1);
return cp_perfmon_cntl;
}
@@ -421,12 +421,12 @@ class gfx12_cntx_prim {
}
static uint32_t cp_perfmon_cntl_spm_start_value() {
uint32_t cp_perfmon_cntl{0};
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 1);
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 1);
return cp_perfmon_cntl;
}
static uint32_t cp_perfmon_cntl_spm_stop_value() {
uint32_t cp_perfmon_cntl{0};
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL_1, SPM_PERFMON_STATE, 2);
cp_perfmon_cntl = SET_REG_FIELD_BITS(CP_PERFMON_CNTL, SPM_PERFMON_STATE, 2);
return cp_perfmon_cntl;
}
static uint32_t rlc_spm_muxsel_data(const uint32_t& value, const counter_des_t& counter_des,
+8
Féach ar an gComhad
@@ -78,6 +78,7 @@ void Gfx12Factory::ConstructBuilders(const AgentInfo* agent_info) {
}
void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
auto agent_name = std::string_view(agent_info->name).substr(0, 7);
// Global blocks
block_table_[__BLOCK_ID(CHA)] = &ChaCounterBlockInfo;
block_table_[__BLOCK_ID(CHC)] = &ChcCounterBlockInfo;
@@ -108,6 +109,13 @@ void Gfx12Factory::ConstructTable(const AgentInfo* agent_info) {
block_table_[__BLOCK_ID_HSA(TA)] = &TaCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(TCP)] = &TcpCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(TD)] = &TdCounterBlockInfo;
if (agent_name == "gfx1201") {
block_table_[__BLOCK_ID(CHC)] = &gfx1201::ChcCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GCEA)] = &gfx1201::GceaCounterBlockInfo;
block_table_[__BLOCK_ID(GCEA_SE)] = &gfx1201::GceaSeCounterBlockInfo;
block_table_[__BLOCK_ID_HSA(GL2C)] = &gfx1201::Gl2cCounterBlockInfo;
}
}
// Pm4Factory create mathods
+13 -1
Féach ar an gComhad
@@ -28,10 +28,22 @@
#include "util/reg_offsets.h"
#include "linux/registers/gc/gc_12_0_0_offset.h"
#include "linux/registers/gc/gc_12_0_0_sh_mask.h"
// Rename CP_PERFMON_CNTL_1 to CP_PERFMON_CNTL for better compatibility
// CP_PERFMON_CNTL_1
#define regCP_PERFMON_CNTL_BASE_IDX regCP_PERFMON_CNTL_1_BASE_IDX
#define regCP_PERFMON_CNTL regCP_PERFMON_CNTL_1
#define CP_PERFMON_CNTL__PERFMON_STATE__SHIFT CP_PERFMON_CNTL_1__PERFMON_STATE__SHIFT
#define CP_PERFMON_CNTL__SPM_PERFMON_STATE__SHIFT CP_PERFMON_CNTL_1__SPM_PERFMON_STATE__SHIFT
#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE__SHIFT CP_PERFMON_CNTL_1__PERFMON_ENABLE_MODE__SHIFT
#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE__SHIFT CP_PERFMON_CNTL_1__PERFMON_SAMPLE_ENABLE__SHIFT
#define CP_PERFMON_CNTL__PERFMON_STATE_MASK CP_PERFMON_CNTL_1__PERFMON_STATE_MASK
#define CP_PERFMON_CNTL__SPM_PERFMON_STATE_MASK CP_PERFMON_CNTL_1__SPM_PERFMON_STATE_MASK
#define CP_PERFMON_CNTL__PERFMON_ENABLE_MODE_MASK CP_PERFMON_CNTL_1__PERFMON_ENABLE_MODE_MASK
#define CP_PERFMON_CNTL__PERFMON_SAMPLE_ENABLE_MASK CP_PERFMON_CNTL_1__PERFMON_SAMPLE_ENABLE_MASK
#include "linux/packets/nvd.h"
#include "gfxip/gfx12/gfx12_block_info.h"
using namespace gfxip::gfx12;
using namespace gfxip::gfx12::gfx1201;
using namespace gfxip::gfx12::gfx1200;
#include "gfxip/gfx12/gfx12_primitives.h"
#include "gfxip/gfx12/gfx12_block_table.h"