Add to/correct handling of RDC_EVNT_XGMI_*_THRPUT events

RDC_EVNT_XGMI_[2-5]_THRPUT were missing from RDC. Additionally,
these were handled as "pseudo" events, but this is not
necessary.

Change-Id: I3478365ac0d78f60a7b63235bea484f3edb8bd16


[ROCm/rdc commit: a9d0e037b5]
이 커밋은 다음에 포함됨:
Chris Freehill
2021-01-27 15:46:57 -06:00
부모 8a4a3124f1
커밋 7cf47fb5c9
5개의 변경된 파일76개의 추가작업 그리고 55개의 파일을 삭제
+4
파일 보기
@@ -84,6 +84,10 @@ FLD_DESC_ENT(RDC_EVNT_XGMI_1_BEATS_TX, "Data sent to neighbor 1 (32 byte pkts)
FLD_DESC_ENT(RDC_EVNT_XGMI_0_THRPUT, "Tx throughput to XGMI neighbor 0 in b/s", "XGMI_0_T", true)
FLD_DESC_ENT(RDC_EVNT_XGMI_1_THRPUT, "Tx throughput to XGMI neighbor 1 in b/s", "XGMI_1_T", true)
FLD_DESC_ENT(RDC_EVNT_XGMI_2_THRPUT, "Tx throughput to XGMI neighbor 2 in b/s", "XGMI_2_T", true)
FLD_DESC_ENT(RDC_EVNT_XGMI_3_THRPUT, "Tx throughput to XGMI neighbor 3 in b/s", "XGMI_3_T", true)
FLD_DESC_ENT(RDC_EVNT_XGMI_4_THRPUT, "Tx throughput to XGMI neighbor 4 in b/s", "XGMI_4_T", true)
FLD_DESC_ENT(RDC_EVNT_XGMI_5_THRPUT, "Tx throughput to XGMI neighbor 5 in b/s", "XGMI_5_T", true)
// Asynchronous event notifications
FLD_DESC_ENT(RDC_EVNT_NOTIF_VMFAULT, "VM page fault", "VM_PAGE_FAULT", false)
+8
파일 보기
@@ -261,6 +261,14 @@ typedef enum {
//!< neighbor 0 in byes/sec
RDC_EVNT_XGMI_1_THRPUT, //!< Transmit throughput to XGMI
//!< neighbor 1 in byes/sec
RDC_EVNT_XGMI_2_THRPUT, //!< Transmit throughput to XGMI
//!< neighbor 2 in byes/sec
RDC_EVNT_XGMI_3_THRPUT, //!< Transmit throughput to XGMI
//!< neighbor 3 in byes/sec
RDC_EVNT_XGMI_4_THRPUT, //!< Transmit throughput to XGMI
//!< neighbor 4 in byes/sec
RDC_EVNT_XGMI_5_THRPUT, //!< Transmit throughput to XGMI
//!< neighbor 5 in byes/sec
RDC_EVNT_NOTIF_VMFAULT = 2000, //!< VM page fault
RDC_EVNT_NOTIF_FIRST = RDC_EVNT_NOTIF_VMFAULT,
+60 -53
파일 보기
@@ -47,15 +47,15 @@ static const std::unordered_map<rdc_field_t, rsmi_event_type_t>
{RDC_EVNT_XGMI_1_REQ_TX, RSMI_EVNT_XGMI_1_REQUEST_TX},
{RDC_EVNT_XGMI_1_RESP_TX, RSMI_EVNT_XGMI_1_RESPONSE_TX},
{RDC_EVNT_XGMI_1_BEATS_TX, RSMI_EVNT_XGMI_1_BEATS_TX},
};
// This maps pseudo-events to the raw events that they use.
static const std::unordered_map<rdc_field_t, rdc_field_t> pseudo_evt_map = {
{RDC_EVNT_XGMI_0_THRPUT, RDC_EVNT_XGMI_0_BEATS_TX},
{RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_1_BEATS_TX},
{RDC_EVNT_XGMI_0_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_0},
{RDC_EVNT_XGMI_1_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_1},
{RDC_EVNT_XGMI_2_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_2},
{RDC_EVNT_XGMI_3_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_3},
{RDC_EVNT_XGMI_4_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_4},
{RDC_EVNT_XGMI_5_THRPUT, RSMI_EVNT_XGMI_DATA_OUT_5},
};
RdcMetricFetcherImpl::RdcMetricFetcherImpl() {
task_started_ = true;
@@ -442,6 +442,10 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index,
break;
case RDC_EVNT_XGMI_0_THRPUT:
case RDC_EVNT_XGMI_1_THRPUT:
case RDC_EVNT_XGMI_2_THRPUT:
case RDC_EVNT_XGMI_3_THRPUT:
case RDC_EVNT_XGMI_4_THRPUT:
case RDC_EVNT_XGMI_5_THRPUT:
read_rsmi_counter();
if (value->status == RDC_ST_OK) {
if (rsmi_data->counter_val.time_running > 0) {
@@ -492,18 +496,6 @@ RdcMetricFetcherImpl::get_rsmi_data(RdcFieldKey key) {
if (r_info != rsmi_data_.end()) {
return r_info->second;
}
auto pseudo_key_field_id = pseudo_evt_map.find(key.second);
if (pseudo_key_field_id != pseudo_evt_map.end()) {
RdcFieldKey new_key;
new_key.first = key.first;
new_key.second = pseudo_key_field_id->second;
r_info = rsmi_data_.find(new_key);
if (r_info != rsmi_data_.end()) {
return r_info->second;
}
}
return nullptr;
}
@@ -530,17 +522,13 @@ static rdc_status_t init_rsmi_counter(RdcFieldKey fk,
return RDC_ST_INSUFF_RESOURCES;
}
auto raw_evt = pseudo_evt_map.find(f);
if (raw_evt != pseudo_evt_map.end()) {
f = raw_evt->second;
}
rsmi_event_type_t evt = rdc_evnt_2_rsmi_field.at(f);
ret = rsmi_dev_counter_create(dv_ind, evt, handle);
if (ret != RSMI_STATUS_SUCCESS) {
return Rsmi2RdcError(ret);
}
ret = rsmi_counter_control(*handle, RSMI_CNTR_CMD_START, NULL);
ret = rsmi_counter_control(*handle, RSMI_CNTR_CMD_START, nullptr);
return Rsmi2RdcError(ret);
}
@@ -557,7 +545,11 @@ rdc_status_t RdcMetricFetcherImpl::delete_rsmi_handle(RdcFieldKey fk) {
case RDC_EVNT_XGMI_1_RESP_TX:
case RDC_EVNT_XGMI_1_BEATS_TX:
case RDC_EVNT_XGMI_0_THRPUT:
case RDC_EVNT_XGMI_1_THRPUT: {
case RDC_EVNT_XGMI_1_THRPUT:
case RDC_EVNT_XGMI_2_THRPUT:
case RDC_EVNT_XGMI_3_THRPUT:
case RDC_EVNT_XGMI_4_THRPUT:
case RDC_EVNT_XGMI_5_THRPUT: {
rsmi_event_handle_t h;
if (rsmi_data_.find(fk) == rsmi_data_.end()) {
return RDC_ST_NOT_SUPPORTED;
@@ -585,7 +577,33 @@ rdc_status_t RdcMetricFetcherImpl::delete_rsmi_handle(RdcFieldKey fk) {
}
rdc_status_t RdcMetricFetcherImpl::acquire_rsmi_handle(RdcFieldKey fk) {
rdc_status_t result;
rdc_status_t ret;
auto get_evnt_handle = [&](rsmi_event_group_t grp) {
rsmi_event_handle_t handle;
rdc_status_t result;
if (get_rsmi_data(fk) != nullptr) {
// This event has already been initialized.
return RDC_ST_ALREADY_EXIST;
}
result = init_rsmi_counter(fk, grp, &handle);
if (result != RDC_ST_OK) {
return result;
}
auto fsh = std::shared_ptr<FieldRSMIData>(new FieldRSMIData);
if (fsh == nullptr) {
return RDC_ST_INSUFF_RESOURCES;
}
fsh->evt_handle = handle;
rsmi_data_[fk] = fsh;
return RDC_ST_OK;
};
switch (fk.second) {
case RDC_EVNT_XGMI_0_NOP_TX:
@@ -596,41 +614,30 @@ rdc_status_t RdcMetricFetcherImpl::acquire_rsmi_handle(RdcFieldKey fk) {
case RDC_EVNT_XGMI_1_REQ_TX:
case RDC_EVNT_XGMI_1_RESP_TX:
case RDC_EVNT_XGMI_1_BEATS_TX:
ret = get_evnt_handle(RSMI_EVNT_GRP_XGMI);
break;
case RDC_EVNT_XGMI_0_THRPUT:
case RDC_EVNT_XGMI_1_THRPUT: {
rsmi_event_handle_t handle;
if (get_rsmi_data(fk) != nullptr) {
// This event has already been initialized.
return RDC_ST_ALREADY_EXIST;
}
result = init_rsmi_counter(fk, RSMI_EVNT_GRP_XGMI, &handle);
if (result != RDC_ST_OK) {
return result;
}
auto fsh = std::shared_ptr<FieldRSMIData>(new FieldRSMIData);
if (fsh == nullptr) {
return RDC_ST_INSUFF_RESOURCES;
}
fsh->evt_handle = handle;
auto pseudo_key = pseudo_evt_map.find(fk.second);
if (pseudo_key != pseudo_evt_map.end()) {
fk.second = pseudo_key->second;
}
rsmi_data_[fk] = fsh;
}
case RDC_EVNT_XGMI_1_THRPUT:
case RDC_EVNT_XGMI_2_THRPUT:
case RDC_EVNT_XGMI_3_THRPUT:
case RDC_EVNT_XGMI_4_THRPUT:
case RDC_EVNT_XGMI_5_THRPUT:
ret = get_evnt_handle(RSMI_EVNT_GRP_XGMI_DATA_OUT);
break;
default:
break;
}
return RDC_ST_OK;
if (ret == RDC_ST_INSUFF_RESOURCES) {
amd::rdc::fld_id2name_map_t &field_id_to_descript =
amd::rdc::get_field_id_description_from_id();
RDC_LOG(RDC_ERROR, "No event counters are available for " <<
field_id_to_descript.at(fk.second).enum_name << " event.");
}
return ret;
}
} // namespace rdc
+1 -1
파일 보기
@@ -25,7 +25,7 @@ THE SOFTWARE.
#include <unordered_map>
#include <vector>
#include <mutex>
#include <mutex> // NOLINT
#include "rdc/rdc.h"
#include "rdc_lib/impl/RdcTelemetryModule.h"
+3 -1
파일 보기
@@ -145,7 +145,9 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(
RDC_EVNT_XGMI_0_RESP_TX, RDC_EVNT_XGMI_0_BEATS_TX,
RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX,
RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX,
RDC_EVNT_XGMI_0_THRPUT, RDC_EVNT_XGMI_1_THRPUT
RDC_EVNT_XGMI_0_THRPUT, RDC_EVNT_XGMI_1_THRPUT,
RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT,
RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT,
};
std::copy(fields.begin(), fields.end(), field_ids);
*field_count = fields.size();