Merge commit '8f3a2326136caefa935876155eddb61177ad362c' into develop

This commit is contained in:
systems-assistant[bot]
2025-07-30 15:21:33 +00:00
9 fájl változott, egészen pontosan 86 új sor hozzáadva és 69 régi sor törölve
@@ -51,6 +51,7 @@ if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME})
find_package(hipblaslt REQUIRED)
find_package(hsakmt REQUIRED)
find_package(hip REQUIRED)
find_package(hiprand REQUIRED)
find_package(hsa-runtime64 REQUIRED)
find_package(amd_smi REQUIRED)
target_link_libraries(
@@ -62,6 +63,7 @@ if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME})
roc::hipblaslt
hsakmt::hsakmt
hip::amdhip64
hip::hiprand
hsa-runtime64::hsa-runtime64
amd_smi
)
@@ -195,7 +195,6 @@ FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "", "CPF_CPF_TCIU_I
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_STALL, "", "CPF_CPF_TCIU_STALL", false)
// Misc
FLD_DESC_ENT(RDC_FI_PROF_SIMD_UTILIZATION, "Fraction of time the SIMDs are being utilized", "SIMD_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_UUID, "UUID from rocprofiler", "PROF_UUID", true)
FLD_DESC_ENT(RDC_FI_PROF_KFD_ID, "GPU_ID from rocprofiler, same as KFD_ID", "PROF_KFD_ID", true)
// Events
-1
Fájl megtekintése
@@ -345,7 +345,6 @@ typedef enum {
RDC_FI_PROF_CPF_CPF_TCIU_IDLE,
RDC_FI_PROF_CPF_CPF_TCIU_STALL,
RDC_FI_PROF_SIMD_UTILIZATION,
RDC_FI_PROF_UUID,
RDC_FI_PROF_KFD_ID,
/**
@@ -64,6 +64,7 @@ class RdcConfigSettingsImpl : public RdcConfigSettings {
void monitorSettings();
uint64_t wattsToMicrowatts(uint64_t watts) const;
uint64_t microwattsToWatts(int microwatts) const;
uint64_t mHzToHz(uint64_t mhz) const;
rdc_status_t get_group_info(rdc_gpu_group_t group_id, rdc_group_info_t* rdc_group_info);
};
@@ -79,7 +79,7 @@ class CounterSampler {
size_t get_counter_size(rocprofiler_counter_id_t counter);
// Get the dimensions of a counter
std::vector<rocprofiler_record_dimension_info_t> get_counter_dimensions(
std::vector<rocprofiler_counter_record_dimension_info_t> get_counter_dimensions(
rocprofiler_counter_id_t counter);
static std::vector<std::shared_ptr<CounterSampler>> samplers_;
@@ -44,7 +44,7 @@ void RdcConfigSettingsImpl::monitorSettings() {
rdc_status_t rdc_status;
rdc_group_info_t rdc_group_info = {};
amdsmi_power_cap_info_t cap_info = {};
amdsmi_frequencies_t freqs = {};
amdsmi_clk_info_t info = {};
uint64_t cached_value;
while (true) {
@@ -102,18 +102,18 @@ void RdcConfigSettingsImpl::monitorSettings() {
}
// Mem clock
status = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_MEM, &freqs);
status = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_MEM, &info);
if (status != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(
RDC_ERROR,
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq failed: " << status);
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq for mem failed: "
<< status);
continue;
}
auto mem_clk_it = cached_settings.find(RDC_CFG_MEMORY_CLOCK_LIMIT);
if (mem_clk_it != cached_settings.end()) {
cached_value = mem_clk_it->second.target_value;
if (freqs.frequency[freqs.current] == cached_value) {
if (info.max_clk != cached_value) {
status = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_MEM,
CLK_LIMIT_MAX, cached_value);
if (status != AMDSMI_STATUS_SUCCESS) {
@@ -127,18 +127,18 @@ void RdcConfigSettingsImpl::monitorSettings() {
}
// GFX clock
status = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_GFX, &freqs);
status = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_GFX, &info);
if (status != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(
RDC_ERROR,
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq failed: " << status);
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq for gfx failed: "
<< status);
continue;
}
auto gfx_clk_it = cached_settings.find(RDC_CFG_GFX_CLOCK_LIMIT);
if (gfx_clk_it != cached_settings.end()) {
cached_value = gfx_clk_it->second.target_value;
if (freqs.frequency[freqs.current] == cached_value) {
if (info.max_clk != cached_value) {
status = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_GFX,
CLK_LIMIT_MAX, cached_value);
if (status != AMDSMI_STATUS_SUCCESS) {
@@ -165,6 +165,8 @@ uint64_t RdcConfigSettingsImpl::microwattsToWatts(int microwatts) const {
return microwatts / 1'000'000;
}
uint64_t RdcConfigSettingsImpl::mHzToHz(uint64_t mhz) const { return mhz * 1000000ULL; }
rdc_status_t RdcConfigSettingsImpl::get_group_info(rdc_gpu_group_t group_id,
rdc_group_info_t* rdc_group_info) {
rdc_status_t status = group_settings_->rdc_group_gpu_get_info(group_id, rdc_group_info);
@@ -312,19 +314,31 @@ rdc_status_t RdcConfigSettingsImpl::rdc_config_clear(rdc_gpu_group_t group_id) {
// Reset GFX clock limit if it was set
if (group_iter->second.find(RDC_CFG_GFX_CLOCK_LIMIT) != group_iter->second.end()) {
amdsmi_frequencies_t freqs = {};
amdsmi_clk_info_t info = {};
amd_ret = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_GFX, &freqs);
if (amd_ret == AMDSMI_STATUS_SUCCESS) {
uint64_t curr = freqs.frequency[freqs.current];
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
if (curr != maxf) {
amd_ret = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_GFX, CLK_LIMIT_MAX,
AMDSMI_DEV_PERF_LEVEL_AUTO);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset GFX clock limit : "
<< amd_ret);
break;
}
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get GFX freq: " << amd_ret);
break;
}
amd_ret = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_GFX, &info);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get GFX info: " << amd_ret);
break;
}
uint64_t curr = mHzToHz(info.max_clk);
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
if (curr != maxf) {
amd_ret = amdsmi_set_gpu_perf_level(processor_handle, AMDSMI_DEV_PERF_LEVEL_AUTO);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset GFX clock limit : "
<< amd_ret);
break;
}
}
}
@@ -332,19 +346,31 @@ rdc_status_t RdcConfigSettingsImpl::rdc_config_clear(rdc_gpu_group_t group_id) {
// Reset memory clock limit if it was set
if (group_iter->second.find(RDC_CFG_MEMORY_CLOCK_LIMIT) != group_iter->second.end()) {
amdsmi_frequencies_t freqs = {};
amdsmi_clk_info_t info = {};
amd_ret = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_MEM, &freqs);
if (amd_ret == AMDSMI_STATUS_SUCCESS) {
uint64_t curr = freqs.frequency[freqs.current];
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
if (curr != maxf) {
amd_ret =
amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_MEM, CLK_LIMIT_MAX, 0);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset memory clock limit:"
<< amd_ret);
break;
}
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get MEM freq: " << amd_ret);
break;
}
amd_ret = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_MEM, &info);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get MEM info: " << amd_ret);
break;
}
uint64_t curr = mHzToHz(info.max_clk);
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
if (curr != maxf) {
amd_ret = amdsmi_set_gpu_perf_level(processor_handle, AMDSMI_DEV_PERF_LEVEL_AUTO);
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR,
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset memory clock limit:"
<< amd_ret);
break;
}
}
}
@@ -110,7 +110,6 @@ static const std::map<rdc_field_t, const char*> temp_field_map_k = {
{RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"},
{RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"},
{RDC_FI_PROF_SIMD_UTILIZATION, "SIMD_UTILIZATION"},
{RDC_FI_PROF_UUID, "SQ_WAVES"}, // dummy value,
{RDC_FI_PROF_KFD_ID, "SQ_WAVES"}, // dummy value,
};
@@ -164,21 +163,6 @@ const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
return field_ids;
}
rocprofiler_uuid_t asic_serial_to_uuid(const char* asic_serial) {
rocprofiler_uuid_t uuid = {0};
// have to cast to stoull as a workaround for amdsmi ignoring leading zeroes
uuid.value = std::stoull(asic_serial, nullptr, 16);
return uuid;
}
std::string uuid_to_string(const uint64_t uuid) {
std::ostringstream oss;
oss << "0x" << std::hex << std::setw(16) << std::setfill('0') << uuid;
return oss.str();
}
std::string uuid_to_string(const rocprofiler_uuid_t& uuid) { return uuid_to_string(uuid.value); }
rdc_status_t RdcRocpBase::map_entity_to_profiler() {
// std::map<uint32_t, uint32_t> entity_to_index_map;
// kfd_id_t is only used inside this function
@@ -419,15 +403,8 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, rdc_field_value
// FLOPS/clock/CU
data->dbl = divided_dbl / (256.0F / static_cast<double>(agents[agent_index].simd_per_cu));
break;
case RDC_FI_PROF_UUID: {
// do not care what RDC_FI_PROF_UUID is mapped to. read value from agents
*type = STRING;
std::string uuid_str = uuid_to_string(agents[agent_index].uuid);
strncpy_with_null(data->str, uuid_str.c_str(), uuid_str.length());
break;
}
case RDC_FI_PROF_KFD_ID: {
// do not care what RDC_FI_PROF_UUID is mapped to. read value from agents
// do not care what it is mapped to. read value from agents
*type = INTEGER;
data->l_int = agents[agent_index].gpu_id;
break;
@@ -204,7 +204,7 @@ size_t CounterSampler::get_counter_size(rocprofiler_counter_id_t counter) {
rocprofiler_counter_info_v1_t info;
rocprofiler_query_counter_info(counter, ROCPROFILER_COUNTER_INFO_VERSION_1,
static_cast<void*>(&info));
return info.instance_ids_count;
return info.dimensions_instances_count;
}
std::unordered_map<std::string, rocprofiler_counter_id_t> CounterSampler::get_supported_counters(
@@ -241,7 +241,7 @@ std::unordered_map<std::string, rocprofiler_counter_id_t> CounterSampler::get_su
return out;
}
std::vector<rocprofiler_record_dimension_info_t> CounterSampler::get_counter_dimensions(
std::vector<rocprofiler_counter_record_dimension_info_t> CounterSampler::get_counter_dimensions(
rocprofiler_counter_id_t counter) {
rocprofiler_counter_info_v1_t info;
RocprofilerCall(
@@ -250,8 +250,8 @@ std::vector<rocprofiler_record_dimension_info_t> CounterSampler::get_counter_dim
static_cast<void*>(&info));
},
"Could not query info for counter", __FILE__, __LINE__);
return std::vector<rocprofiler_record_dimension_info_t>{info.dimensions,
info.dimensions + info.dimensions_count};
return std::vector<rocprofiler_counter_record_dimension_info_t>{
*info.dimensions, *info.dimensions + info.dimensions_count};
}
int tool_init(rocprofiler_client_finalize_t, void*) {
@@ -119,11 +119,9 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case,
case RDC_DIAG_RVS_GST_TEST:
case RDC_DIAG_RVS_MEMBW_TEST:
case RDC_DIAG_RVS_H2DD2H_TEST:
case RDC_DIAG_RVS_IET_TEST:
case RDC_DIAG_RVS_GST_LONG_TEST:
case RDC_DIAG_RVS_MEMBW_LONG_TEST:
case RDC_DIAG_RVS_H2DD2H_LONG_TEST:
case RDC_DIAG_RVS_IET_LONG_TEST: {
case RDC_DIAG_RVS_H2DD2H_LONG_TEST: {
const std::string test_name = "Finished running " + test_to_name.at(test_case);
if (test_to_conf.find(test_case) == test_to_conf.end()) {
RDC_LOG(RDC_ERROR, "cannot find test " << test_to_name.at(test_case));
@@ -135,6 +133,21 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case,
rvs_status =
rvs_p->run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, callback);
break;
}
// IET tests don't work with callback, give it nullptr
case RDC_DIAG_RVS_IET_TEST:
case RDC_DIAG_RVS_IET_LONG_TEST: {
const std::string test_name = "Finished running " + test_to_name.at(test_case);
if (test_to_conf.find(test_case) == test_to_conf.end()) {
RDC_LOG(RDC_ERROR, "cannot find test " << test_to_name.at(test_case));
return RDC_ST_NOT_FOUND;
}
const std::string predefined_config = test_to_conf.at(test_case);
// +1 to copy null
strncpy_with_null(result->info, test_name.c_str(), test_name.length() + 1);
rvs_status =
rvs_p->run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, nullptr);
break;
}
case RDC_DIAG_RVS_CUSTOM:
RDC_LOG(RDC_ERROR, "custom config cannot be bundled with other tests!");