Merge commit '8f3a2326136caefa935876155eddb61177ad362c' into develop
This commit is contained in:
@@ -51,6 +51,7 @@ if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME})
|
||||
find_package(hipblaslt REQUIRED)
|
||||
find_package(hsakmt REQUIRED)
|
||||
find_package(hip REQUIRED)
|
||||
find_package(hiprand REQUIRED)
|
||||
find_package(hsa-runtime64 REQUIRED)
|
||||
find_package(amd_smi REQUIRED)
|
||||
target_link_libraries(
|
||||
@@ -62,6 +63,7 @@ if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME})
|
||||
roc::hipblaslt
|
||||
hsakmt::hsakmt
|
||||
hip::amdhip64
|
||||
hip::hiprand
|
||||
hsa-runtime64::hsa-runtime64
|
||||
amd_smi
|
||||
)
|
||||
|
||||
@@ -195,7 +195,6 @@ FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "", "CPF_CPF_TCIU_I
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_STALL, "", "CPF_CPF_TCIU_STALL", false)
|
||||
// Misc
|
||||
FLD_DESC_ENT(RDC_FI_PROF_SIMD_UTILIZATION, "Fraction of time the SIMDs are being utilized", "SIMD_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_UUID, "UUID from rocprofiler", "PROF_UUID", true)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_KFD_ID, "GPU_ID from rocprofiler, same as KFD_ID", "PROF_KFD_ID", true)
|
||||
|
||||
// Events
|
||||
|
||||
@@ -345,7 +345,6 @@ typedef enum {
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_IDLE,
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_STALL,
|
||||
RDC_FI_PROF_SIMD_UTILIZATION,
|
||||
RDC_FI_PROF_UUID,
|
||||
RDC_FI_PROF_KFD_ID,
|
||||
|
||||
/**
|
||||
|
||||
@@ -64,6 +64,7 @@ class RdcConfigSettingsImpl : public RdcConfigSettings {
|
||||
void monitorSettings();
|
||||
uint64_t wattsToMicrowatts(uint64_t watts) const;
|
||||
uint64_t microwattsToWatts(int microwatts) const;
|
||||
uint64_t mHzToHz(uint64_t mhz) const;
|
||||
rdc_status_t get_group_info(rdc_gpu_group_t group_id, rdc_group_info_t* rdc_group_info);
|
||||
};
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ class CounterSampler {
|
||||
size_t get_counter_size(rocprofiler_counter_id_t counter);
|
||||
|
||||
// Get the dimensions of a counter
|
||||
std::vector<rocprofiler_record_dimension_info_t> get_counter_dimensions(
|
||||
std::vector<rocprofiler_counter_record_dimension_info_t> get_counter_dimensions(
|
||||
rocprofiler_counter_id_t counter);
|
||||
|
||||
static std::vector<std::shared_ptr<CounterSampler>> samplers_;
|
||||
|
||||
@@ -44,7 +44,7 @@ void RdcConfigSettingsImpl::monitorSettings() {
|
||||
rdc_status_t rdc_status;
|
||||
rdc_group_info_t rdc_group_info = {};
|
||||
amdsmi_power_cap_info_t cap_info = {};
|
||||
amdsmi_frequencies_t freqs = {};
|
||||
amdsmi_clk_info_t info = {};
|
||||
uint64_t cached_value;
|
||||
|
||||
while (true) {
|
||||
@@ -102,18 +102,18 @@ void RdcConfigSettingsImpl::monitorSettings() {
|
||||
}
|
||||
|
||||
// Mem clock
|
||||
status = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_MEM, &freqs);
|
||||
status = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_MEM, &info);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(
|
||||
RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq failed: " << status);
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq for mem failed: "
|
||||
<< status);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto mem_clk_it = cached_settings.find(RDC_CFG_MEMORY_CLOCK_LIMIT);
|
||||
if (mem_clk_it != cached_settings.end()) {
|
||||
cached_value = mem_clk_it->second.target_value;
|
||||
if (freqs.frequency[freqs.current] == cached_value) {
|
||||
if (info.max_clk != cached_value) {
|
||||
status = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_MEM,
|
||||
CLK_LIMIT_MAX, cached_value);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
@@ -127,18 +127,18 @@ void RdcConfigSettingsImpl::monitorSettings() {
|
||||
}
|
||||
|
||||
// GFX clock
|
||||
status = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_GFX, &freqs);
|
||||
status = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_GFX, &info);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(
|
||||
RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq failed: " << status);
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::monitorSettings(); amdsmi_get_clk_freq for gfx failed: "
|
||||
<< status);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto gfx_clk_it = cached_settings.find(RDC_CFG_GFX_CLOCK_LIMIT);
|
||||
if (gfx_clk_it != cached_settings.end()) {
|
||||
cached_value = gfx_clk_it->second.target_value;
|
||||
if (freqs.frequency[freqs.current] == cached_value) {
|
||||
if (info.max_clk != cached_value) {
|
||||
status = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_GFX,
|
||||
CLK_LIMIT_MAX, cached_value);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
@@ -165,6 +165,8 @@ uint64_t RdcConfigSettingsImpl::microwattsToWatts(int microwatts) const {
|
||||
return microwatts / 1'000'000;
|
||||
}
|
||||
|
||||
uint64_t RdcConfigSettingsImpl::mHzToHz(uint64_t mhz) const { return mhz * 1000000ULL; }
|
||||
|
||||
rdc_status_t RdcConfigSettingsImpl::get_group_info(rdc_gpu_group_t group_id,
|
||||
rdc_group_info_t* rdc_group_info) {
|
||||
rdc_status_t status = group_settings_->rdc_group_gpu_get_info(group_id, rdc_group_info);
|
||||
@@ -312,19 +314,31 @@ rdc_status_t RdcConfigSettingsImpl::rdc_config_clear(rdc_gpu_group_t group_id) {
|
||||
// Reset GFX clock limit if it was set
|
||||
if (group_iter->second.find(RDC_CFG_GFX_CLOCK_LIMIT) != group_iter->second.end()) {
|
||||
amdsmi_frequencies_t freqs = {};
|
||||
amdsmi_clk_info_t info = {};
|
||||
|
||||
amd_ret = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_GFX, &freqs);
|
||||
if (amd_ret == AMDSMI_STATUS_SUCCESS) {
|
||||
uint64_t curr = freqs.frequency[freqs.current];
|
||||
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
|
||||
if (curr != maxf) {
|
||||
amd_ret = amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_GFX, CLK_LIMIT_MAX,
|
||||
AMDSMI_DEV_PERF_LEVEL_AUTO);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset GFX clock limit : "
|
||||
<< amd_ret);
|
||||
break;
|
||||
}
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get GFX freq: " << amd_ret);
|
||||
break;
|
||||
}
|
||||
|
||||
amd_ret = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_GFX, &info);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get GFX info: " << amd_ret);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t curr = mHzToHz(info.max_clk);
|
||||
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
|
||||
if (curr != maxf) {
|
||||
amd_ret = amdsmi_set_gpu_perf_level(processor_handle, AMDSMI_DEV_PERF_LEVEL_AUTO);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset GFX clock limit : "
|
||||
<< amd_ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -332,19 +346,31 @@ rdc_status_t RdcConfigSettingsImpl::rdc_config_clear(rdc_gpu_group_t group_id) {
|
||||
// Reset memory clock limit if it was set
|
||||
if (group_iter->second.find(RDC_CFG_MEMORY_CLOCK_LIMIT) != group_iter->second.end()) {
|
||||
amdsmi_frequencies_t freqs = {};
|
||||
amdsmi_clk_info_t info = {};
|
||||
|
||||
amd_ret = amdsmi_get_clk_freq(processor_handle, AMDSMI_CLK_TYPE_MEM, &freqs);
|
||||
if (amd_ret == AMDSMI_STATUS_SUCCESS) {
|
||||
uint64_t curr = freqs.frequency[freqs.current];
|
||||
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
|
||||
if (curr != maxf) {
|
||||
amd_ret =
|
||||
amdsmi_set_gpu_clk_limit(processor_handle, AMDSMI_CLK_TYPE_MEM, CLK_LIMIT_MAX, 0);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset memory clock limit:"
|
||||
<< amd_ret);
|
||||
break;
|
||||
}
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get MEM freq: " << amd_ret);
|
||||
break;
|
||||
}
|
||||
|
||||
amd_ret = amdsmi_get_clock_info(processor_handle, AMDSMI_CLK_TYPE_MEM, &info);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to get MEM info: " << amd_ret);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t curr = mHzToHz(info.max_clk);
|
||||
uint64_t maxf = freqs.frequency[freqs.num_supported - 1];
|
||||
if (curr != maxf) {
|
||||
amd_ret = amdsmi_set_gpu_perf_level(processor_handle, AMDSMI_DEV_PERF_LEVEL_AUTO);
|
||||
if (amd_ret != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"RdcConfigSettingsImpl::rdc_config_clear: Failed to reset memory clock limit:"
|
||||
<< amd_ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,7 +110,6 @@ static const std::map<rdc_field_t, const char*> temp_field_map_k = {
|
||||
{RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"},
|
||||
{RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"},
|
||||
{RDC_FI_PROF_SIMD_UTILIZATION, "SIMD_UTILIZATION"},
|
||||
{RDC_FI_PROF_UUID, "SQ_WAVES"}, // dummy value,
|
||||
{RDC_FI_PROF_KFD_ID, "SQ_WAVES"}, // dummy value,
|
||||
};
|
||||
|
||||
@@ -164,21 +163,6 @@ const std::vector<rdc_field_t> RdcRocpBase::get_field_ids() {
|
||||
return field_ids;
|
||||
}
|
||||
|
||||
rocprofiler_uuid_t asic_serial_to_uuid(const char* asic_serial) {
|
||||
rocprofiler_uuid_t uuid = {0};
|
||||
// have to cast to stoull as a workaround for amdsmi ignoring leading zeroes
|
||||
uuid.value = std::stoull(asic_serial, nullptr, 16);
|
||||
return uuid;
|
||||
}
|
||||
|
||||
std::string uuid_to_string(const uint64_t uuid) {
|
||||
std::ostringstream oss;
|
||||
oss << "0x" << std::hex << std::setw(16) << std::setfill('0') << uuid;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string uuid_to_string(const rocprofiler_uuid_t& uuid) { return uuid_to_string(uuid.value); }
|
||||
|
||||
rdc_status_t RdcRocpBase::map_entity_to_profiler() {
|
||||
// std::map<uint32_t, uint32_t> entity_to_index_map;
|
||||
// kfd_id_t is only used inside this function
|
||||
@@ -419,15 +403,8 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, rdc_field_value
|
||||
// FLOPS/clock/CU
|
||||
data->dbl = divided_dbl / (256.0F / static_cast<double>(agents[agent_index].simd_per_cu));
|
||||
break;
|
||||
case RDC_FI_PROF_UUID: {
|
||||
// do not care what RDC_FI_PROF_UUID is mapped to. read value from agents
|
||||
*type = STRING;
|
||||
std::string uuid_str = uuid_to_string(agents[agent_index].uuid);
|
||||
strncpy_with_null(data->str, uuid_str.c_str(), uuid_str.length());
|
||||
break;
|
||||
}
|
||||
case RDC_FI_PROF_KFD_ID: {
|
||||
// do not care what RDC_FI_PROF_UUID is mapped to. read value from agents
|
||||
// do not care what it is mapped to. read value from agents
|
||||
*type = INTEGER;
|
||||
data->l_int = agents[agent_index].gpu_id;
|
||||
break;
|
||||
|
||||
@@ -204,7 +204,7 @@ size_t CounterSampler::get_counter_size(rocprofiler_counter_id_t counter) {
|
||||
rocprofiler_counter_info_v1_t info;
|
||||
rocprofiler_query_counter_info(counter, ROCPROFILER_COUNTER_INFO_VERSION_1,
|
||||
static_cast<void*>(&info));
|
||||
return info.instance_ids_count;
|
||||
return info.dimensions_instances_count;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, rocprofiler_counter_id_t> CounterSampler::get_supported_counters(
|
||||
@@ -241,7 +241,7 @@ std::unordered_map<std::string, rocprofiler_counter_id_t> CounterSampler::get_su
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_record_dimension_info_t> CounterSampler::get_counter_dimensions(
|
||||
std::vector<rocprofiler_counter_record_dimension_info_t> CounterSampler::get_counter_dimensions(
|
||||
rocprofiler_counter_id_t counter) {
|
||||
rocprofiler_counter_info_v1_t info;
|
||||
RocprofilerCall(
|
||||
@@ -250,8 +250,8 @@ std::vector<rocprofiler_record_dimension_info_t> CounterSampler::get_counter_dim
|
||||
static_cast<void*>(&info));
|
||||
},
|
||||
"Could not query info for counter", __FILE__, __LINE__);
|
||||
return std::vector<rocprofiler_record_dimension_info_t>{info.dimensions,
|
||||
info.dimensions + info.dimensions_count};
|
||||
return std::vector<rocprofiler_counter_record_dimension_info_t>{
|
||||
*info.dimensions, *info.dimensions + info.dimensions_count};
|
||||
}
|
||||
|
||||
int tool_init(rocprofiler_client_finalize_t, void*) {
|
||||
|
||||
@@ -119,11 +119,9 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case,
|
||||
case RDC_DIAG_RVS_GST_TEST:
|
||||
case RDC_DIAG_RVS_MEMBW_TEST:
|
||||
case RDC_DIAG_RVS_H2DD2H_TEST:
|
||||
case RDC_DIAG_RVS_IET_TEST:
|
||||
case RDC_DIAG_RVS_GST_LONG_TEST:
|
||||
case RDC_DIAG_RVS_MEMBW_LONG_TEST:
|
||||
case RDC_DIAG_RVS_H2DD2H_LONG_TEST:
|
||||
case RDC_DIAG_RVS_IET_LONG_TEST: {
|
||||
case RDC_DIAG_RVS_H2DD2H_LONG_TEST: {
|
||||
const std::string test_name = "Finished running " + test_to_name.at(test_case);
|
||||
if (test_to_conf.find(test_case) == test_to_conf.end()) {
|
||||
RDC_LOG(RDC_ERROR, "cannot find test " << test_to_name.at(test_case));
|
||||
@@ -135,6 +133,21 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case,
|
||||
rvs_status =
|
||||
rvs_p->run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, callback);
|
||||
break;
|
||||
}
|
||||
// IET tests don't work with callback, give it nullptr
|
||||
case RDC_DIAG_RVS_IET_TEST:
|
||||
case RDC_DIAG_RVS_IET_LONG_TEST: {
|
||||
const std::string test_name = "Finished running " + test_to_name.at(test_case);
|
||||
if (test_to_conf.find(test_case) == test_to_conf.end()) {
|
||||
RDC_LOG(RDC_ERROR, "cannot find test " << test_to_name.at(test_case));
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
const std::string predefined_config = test_to_conf.at(test_case);
|
||||
// +1 to copy null
|
||||
strncpy_with_null(result->info, test_name.c_str(), test_name.length() + 1);
|
||||
rvs_status =
|
||||
rvs_p->run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, nullptr);
|
||||
break;
|
||||
}
|
||||
case RDC_DIAG_RVS_CUSTOM:
|
||||
RDC_LOG(RDC_ERROR, "custom config cannot be bundled with other tests!");
|
||||
|
||||
Reference in New Issue
Block a user