diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index d20c480a35..393696d251 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -205,7 +205,7 @@ typedef uintptr_t rsmi_event_handle_t; */ typedef enum { RSMI_EVNT_GRP_XGMI = 0, //!< Data Fabric (XGMI) related events - RSMI_EVNT_GRP_XGMI_DATA_OUT = 10, //!< XGMI Outbound data + RSMI_EVNT_GRP_INVALID = 0xFFFFFFFF } rsmi_event_group_t; @@ -248,34 +248,9 @@ typedef enum { //!< neighbor 1; Each beat //!< represents 32 bytes - RSMI_EVNT_XGMI_LAST = RSMI_EVNT_XGMI_1_BEATS_TX, // 5 + RSMI_EVNT_XGMI_LAST = RSMI_EVNT_XGMI_1_BEATS_TX, - RSMI_EVNT_XGMI_DATA_OUT_FIRST = RSMI_EVNT_GRP_XGMI_DATA_OUT, // 10 - - /* - * @brief Events in the RSMI_EVNT_GRP_XGMI_DATA_OUT group measure - * the number of beats sent on an XGMI link. Each beat represents - * 32 bytes. RSMI_EVNT_XGMI_DATA_OUT_n represents the number of - * outbound beats (each representing 32 bytes) on link n.

- * - * XGMI throughput can be calculated by multiplying a event - * such as ::RSMI_EVNT_XGMI_DATA_OUT_n by 32 and dividing by - * the time for which event collection occurred, - * ::rsmi_counter_value_t.time_running (which is in nanoseconds). To get - * bytes per second, multiply this value by 109.
- *
- * Throughput = BEATS/time_running * 109 (bytes/second)
- */ - // ie, Throughput = BEATS/time_running 10^9 bytes/sec - RSMI_EVNT_XGMI_DATA_OUT_0 = RSMI_EVNT_XGMI_DATA_OUT_FIRST, - RSMI_EVNT_XGMI_DATA_OUT_1, //!< Outbound beats to neighbor 1 - RSMI_EVNT_XGMI_DATA_OUT_2, //!< Outbound beats to neighbor 2 - RSMI_EVNT_XGMI_DATA_OUT_3, //!< Outbound beats to neighbor 3 - RSMI_EVNT_XGMI_DATA_OUT_4, //!< Outbound beats to neighbor 4 - RSMI_EVNT_XGMI_DATA_OUT_5, //!< Outbound beats to neighbor 5 - RSMI_EVNT_XGMI_DATA_OUT_LAST = RSMI_EVNT_XGMI_DATA_OUT_5, - - RSMI_EVNT_LAST = RSMI_EVNT_XGMI_LAST, + RSMI_EVNT_LAST = RSMI_EVNT_XGMI_LAST } rsmi_event_type_t; /** @@ -284,7 +259,8 @@ typedef enum { typedef enum { RSMI_CNTR_CMD_START = 0, //!< Start the counter RSMI_CNTR_CMD_STOP, //!< Stop the counter; note that this should not - //!< be used before reading. + //!< be used before reading. It is for temporarily + //!< disabling the counter. } rsmi_counter_command_t; /** @@ -808,8 +784,8 @@ typedef struct { /* Utilization */ uint16_t average_gfx_activity; - uint16_t average_umc_activity; // memory controller - uint16_t average_mm_activity; // UVD or VCN + uint16_t average_umc_activity; // memory controller + uint16_t average_mm_activity; // UVD or VCN /* Power/Energy */ uint16_t average_socket_power; @@ -841,7 +817,7 @@ typedef struct { /* Link width/speed */ uint8_t pcie_link_width; - uint8_t pcie_link_speed; // in 0.1 GT/s + uint8_t pcie_link_speed; // in 0.1 GT/s }rsmi_gpu_metrics_t; /// \cond Ignore in docs. typedef rsmi_gpu_metrics_t rsmi_gpu_metrics; diff --git a/include/rocm_smi/rocm_smi_counters.h b/include/rocm_smi/rocm_smi_counters.h index dc7c740ecd..f019a57be4 100755 --- a/include/rocm_smi/rocm_smi_counters.h +++ b/include/rocm_smi/rocm_smi_counters.h @@ -73,7 +73,7 @@ GetSupportedEventGroups(uint32_t dev_ind, dev_evt_grp_set_t*supported_grps); struct evnt_info_t { uint8_t start_bit; uint8_t field_size; - uint64_t value; + uint32_t value; }; struct perf_read_format_t { diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index b7d5ddd8ce..26566dfa44 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -828,6 +828,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) { static rsmi_status_t set_dev_range(uint32_t dv_ind, std::string range) { + GET_DEV_FROM_INDX int ret = dev->writeDevInfo(amd::smi::kDevPowerODVoltage, range); @@ -1060,9 +1061,9 @@ rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, } // For clock frequency setting, enter a new value by writing a string that - // contains "s/m index clock" to the file. The index should be 0 if to set - // minimum clock. And 1 if to set maximum clock. E.g., "s 0 500" will update - // minimum sclk to be 500 MHz. "m 1 800" will update maximum mclk to 800Mhz. + // contains “s/m index clock” to the file. The index should be 0 if to set + // minimum clock. And 1 if to set maximum clock. E.g., “s 0 500” will update + // minimum sclk to be 500 MHz. “m 1 800” will update maximum mclk to 800Mhz. switch (clkType) { case RSMI_CLK_TYPE_SYS: @@ -1085,7 +1086,7 @@ rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, ret = set_dev_range(dv_ind, sysvalue); if (ret != RSMI_STATUS_SUCCESS) { return ret; - } + } ret = set_dev_range(dv_ind, "c"); if (ret != RSMI_STATUS_SUCCESS) { return ret; @@ -1108,9 +1109,9 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, } // For sclk voltage curve, enter the new values by writing a string that - // contains "vc point clock voltage" to the file. The points are indexed - // by 0, 1 and 2. E.g., "vc 0 300 600" will update point1 with clock set - // as 300Mhz and voltage as 600mV. "vc 2 1000 1000" will update point3 + // contains “vc point clock voltage” to the file. The points are indexed + // by 0, 1 and 2. E.g., “vc 0 300 600” will update point1 with clock set + // as 300Mhz and voltage as 600mV. “vc 2 1000 1000” will update point3 // with clock set as 1000Mhz and voltage 1000mV. std::string sysvalue = "vc"; @@ -2854,8 +2855,6 @@ rsmi_counter_available_counters_get(uint32_t dv_ind, switch (grp) { case RSMI_EVNT_GRP_XGMI: - case RSMI_EVNT_GRP_XGMI_DATA_OUT: - ret = get_dev_value_int(amd::smi::kDevDFCountersAvailable, dv_ind, &val); assert(val < UINT32_MAX); *available = static_cast(val); diff --git a/src/rocm_smi_counters.cc b/src/rocm_smi_counters.cc index 03b869ea32..45ea210460 100755 --- a/src/rocm_smi_counters.cc +++ b/src/rocm_smi_counters.cc @@ -72,7 +72,6 @@ static const char *kPathDeviceEventRoot = "/sys/bus/event_source/devices"; // Event group names static const char *kEvGrpDataFabricFName = "amdgpu_df_#"; -static const char *kEvGrpAmdGpuFName = "amdgpu_#"; // Data Fabric event file names static const char *kDFEvtCake0FtiReqAllocFName = "cake0_ftiinstat_reqalloc"; @@ -84,14 +83,6 @@ static const char *kDFEvtCake1FtiRspAllocFName = "cake1_ftiinstat_rspalloc"; static const char *kDFEvtCake1PcsOutTxDataFName = "cake1_pcsout_txdata"; static const char *kDFEvtCake1PcsOutTxMetaFName = "cake1_pcsout_txmeta"; -// XGMI Data Outbound event file names -static const char *kXGMIDOutBound0FName = "xgmi_link0_data_outbound"; -static const char *kXGMIDOutBound1FName = "xgmi_link1_data_outbound"; -static const char *kXGMIDOutBound2FName = "xgmi_link2_data_outbound"; -static const char *kXGMIDOutBound3FName = "xgmi_link3_data_outbound"; -static const char *kXGMIDOutBound4FName = "xgmi_link4_data_outbound"; -static const char *kXGMIDOutBound5FName = "xgmi_link5_data_outbound"; - static const std::map kEventFNameMap = { {RSMI_EVNT_XGMI_0_NOP_TX, kDFEvtCake0PcsOutTxMetaFName}, @@ -102,18 +93,10 @@ static const std::map kEventFNameMap = { {RSMI_EVNT_XGMI_1_REQUEST_TX, kDFEvtCake1FtiReqAllocFName}, {RSMI_EVNT_XGMI_1_RESPONSE_TX, kDFEvtCake1FtiRspAllocFName}, {RSMI_EVNT_XGMI_1_BEATS_TX, kDFEvtCake1PcsOutTxDataFName}, - - {RSMI_EVNT_XGMI_DATA_OUT_0, kXGMIDOutBound0FName}, - {RSMI_EVNT_XGMI_DATA_OUT_1, kXGMIDOutBound1FName}, - {RSMI_EVNT_XGMI_DATA_OUT_2, kXGMIDOutBound2FName}, - {RSMI_EVNT_XGMI_DATA_OUT_3, kXGMIDOutBound3FName}, - {RSMI_EVNT_XGMI_DATA_OUT_4, kXGMIDOutBound4FName}, - {RSMI_EVNT_XGMI_DATA_OUT_5, kXGMIDOutBound5FName}, }; static const std::map kEvtGrpFNameMap = { - {RSMI_EVNT_GRP_XGMI, kEvGrpDataFabricFName}, - {RSMI_EVNT_GRP_XGMI_DATA_OUT, kEvGrpAmdGpuFName}, + {RSMI_EVNT_GRP_XGMI, kEvGrpDataFabricFName}, {RSMI_EVNT_GRP_INVALID, "bogus"}, }; @@ -124,7 +107,6 @@ static rsmi_event_group_t EvtGrpFromEvtID(rsmi_event_type_t evnt) { return EVGRP_ENUM; \ } EVNT_GRP_RANGE_CHK(XGMI, RSMI_EVNT_GRP_XGMI); - EVNT_GRP_RANGE_CHK(XGMI_DATA_OUT, RSMI_EVNT_GRP_XGMI_DATA_OUT); return RSMI_EVNT_GRP_INVALID; } diff --git a/src/rocm_smi_device.cc b/src/rocm_smi_device.cc index 3b00390ce3..b5449bf8f0 100755 --- a/src/rocm_smi_device.cc +++ b/src/rocm_smi_device.cc @@ -646,7 +646,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, } // copies all data into buffer retVec->insert(retVec->begin(), - std::istreambuf_iterator(fs), {}); + std::istreambuf_iterator(fs),{}); return 0; } diff --git a/tests/rocm_smi_test/functional/perf_cntr_read_write.cc b/tests/rocm_smi_test/functional/perf_cntr_read_write.cc index 4c84ab86ad..c937aec6ee 100755 --- a/tests/rocm_smi_test/functional/perf_cntr_read_write.cc +++ b/tests/rocm_smi_test/functional/perf_cntr_read_write.cc @@ -51,7 +51,6 @@ #include #include #include -#include #include "gtest/gtest.h" #include "rocm_smi/rocm_smi.h" @@ -71,8 +70,7 @@ PerfCntrEvtGrp::~PerfCntrEvtGrp() {} PerfCntrEvtGrp(RSMI_EVNT_GRP_##SHRT, RSMI_EVNT_##SHRT##_FIRST, \ RSMI_EVNT_##SHRT##_LAST, NAME) static const std::vector s_event_groups = { - PC_EVT_GRP(XGMI, "XGMI"), - PC_EVT_GRP(XGMI_DATA_OUT, "XGMI_DATA_OUT") + PC_EVT_GRP(XGMI, "XGMI") }; TestPerfCntrReadWrite::TestPerfCntrReadWrite() : TestBase() { @@ -141,8 +139,8 @@ void TestPerfCntrReadWrite::CountEvents(uint32_t dv_ind, IF_VERB(STANDARD) { std::cout << "\t\t\tValue: " << val->value << std::endl; - std::cout << "\t\t\tTime Enabled (nS): " << val->time_enabled << std::endl; - std::cout << "\t\t\tTime Running (nS): " << val->time_running << std::endl; + std::cout << "\t\t\tTime Enabled: " << val->time_enabled << std::endl; + std::cout << "\t\t\tTime Running: " << val->time_running << std::endl; std::cout << "\t\t\tEvents/Second Running: " << val->value/static_cast(val->time_running) << std::endl; } @@ -150,7 +148,6 @@ void TestPerfCntrReadWrite::CountEvents(uint32_t dv_ind, CHK_ERR_ASRT(ret) } -static const uint64_t kGigByte = 1073741824; // 1024^3 static const uint64_t kGig = 1000000000; static const uint64_t kVg20Level1Bandwidth = 23; // 23 GB/sec @@ -162,9 +159,6 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) { rsmi_counter_value_t val; uint64_t throughput; - std::cout << "Test events sequentially (device " << - dv_ind << ")" << std::endl; - auto utiliz = [&](rsmi_event_type_t evt, uint32_t chan) { IF_VERB(STANDARD) { std::cout << "****************************" << std::endl; @@ -180,7 +174,7 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) { std::cout << "\t\tPass " << i << ":" << std::endl; CountEvents(dv_ind, evt, &val, 1); - double coll_time_sec = static_cast(val.time_running)/kGig; + double coll_time_sec = static_cast(val.time_running)/kGig; throughput = (val.value * 32)/coll_time_sec; std::cout << "\t\t\tCollected events for " << coll_time_sec << " seconds" << std::endl; @@ -188,13 +182,15 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) { std::cout << "\t\t\tXGMI throughput: " << throughput << " bytes/second" << std::endl; std::cout << "\t\t\tXGMI Channel Utilization: " << - 100*throughput/static_cast(kVg20Level1Bandwidth*kGigByte) << + 100*throughput/static_cast(kVg20Level1Bandwidth*kGig) << "%" << std::endl; std::cout << "\t\t\t****" << std::endl; } set_verbosity(tmp_verbosity); }; + utiliz(RSMI_EVNT_XGMI_1_BEATS_TX, 1); + utiliz(RSMI_EVNT_XGMI_0_BEATS_TX, 0); IF_VERB(STANDARD) { std::cout << "****************************" << std::endl; @@ -210,17 +206,7 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) { IF_VERB(STANDARD) { std::cout << "Testing Event Group " << grp.name() << std::endl; } - if (grp.group() == RSMI_EVNT_GRP_XGMI_DATA_OUT) { - utiliz(RSMI_EVNT_XGMI_DATA_OUT_0, 0); - utiliz(RSMI_EVNT_XGMI_DATA_OUT_1, 1); - utiliz(RSMI_EVNT_XGMI_DATA_OUT_2, 2); - utiliz(RSMI_EVNT_XGMI_DATA_OUT_3, 3); - utiliz(RSMI_EVNT_XGMI_DATA_OUT_4, 4); - utiliz(RSMI_EVNT_XGMI_DATA_OUT_5, 5); - } else if (grp.group() == RSMI_EVNT_GRP_XGMI) { - utiliz(RSMI_EVNT_XGMI_1_BEATS_TX, 1); - utiliz(RSMI_EVNT_XGMI_0_BEATS_TX, 0); - } + for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt(); ++evnt) { IF_VERB(STANDARD) { std::cout << "\tTesting Event Type " << evnt << std::endl; @@ -232,21 +218,17 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) { void TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) { + rsmi_event_handle_t evt_handle[RSMI_EVNT_XGMI_LAST - + RSMI_EVNT_XGMI_FIRST + 1]; rsmi_status_t ret; rsmi_counter_value_t val; uint32_t avail_counters; IF_VERB(STANDARD) { std::cout << "****************************" << std::endl; - std::cout << "Test events simultaneously (device " << - dv_ind << ")" << std::endl; + std::cout << "Test events simultaneously" << std::endl; std::cout << "****************************" << std::endl; } - - /* This code is a little convoluted. The reason is that it is meant to test - * having multiple events being used at one time, rather than sequentially - * handling 1 event at a time. - */ for (PerfCntrEvtGrp grp : s_event_groups) { ret = rsmi_dev_counter_group_supported(dv_ind, grp.group()); if (ret == RSMI_STATUS_NOT_SUPPORTED) { @@ -262,18 +244,13 @@ TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) { } ret = rsmi_counter_available_counters_get(dv_ind, grp.group(), - &avail_counters); + &avail_counters); IF_VERB(STANDARD) { std::cout << "Available Counters: " << avail_counters << std::endl; } CHK_ERR_ASRT(ret) - std::shared_ptr evt_handle = - std::shared_ptr( - new rsmi_event_handle_t[avail_counters]); - - uint32_t tmp, j; - uint32_t num_created = 0; + uint32_t tmp; for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt(); evnt += avail_counters) { @@ -283,31 +260,25 @@ TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) { IF_VERB(STANDARD) { std::cout << "\tCreating events..." << std::endl; } - for (j = 0; j < avail_counters; ++j) { + for (uint32_t j = 0; j < avail_counters; ++j) { tmp = static_cast(evnt + j); - - if (tmp > grp.last_evt()) { - break; - } - IF_VERB(STANDARD) { std::cout << "\tEvent Type " << tmp << std::endl; } ret = rsmi_dev_counter_create(dv_ind, - static_cast(tmp), &evt_handle.get()[j]); + static_cast(tmp), &evt_handle[tmp]); CHK_ERR_ASRT(ret) } - num_created = j; + IF_VERB(STANDARD) { std::cout << "\tStart Counters..." << std::endl; } uint32_t tmp_cntrs; - for (j = 0; j < num_created; ++j) { + for (uint32_t j = 0; j < avail_counters; ++j) { tmp = static_cast(evnt + j); - - ret = rsmi_counter_control(evt_handle.get()[j], RSMI_CNTR_CMD_START, + ret = rsmi_counter_control(evt_handle[tmp], RSMI_CNTR_CMD_START, nullptr); CHK_ERR_ASRT(ret) @@ -322,10 +293,9 @@ TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) { IF_VERB(STANDARD) { std::cout << "\tRead Counters..." << std::endl; } - for (j = 0; j < num_created; ++j) { + for (uint32_t j = 0; j < avail_counters; ++j) { tmp = static_cast(evnt + j); - - ret = rsmi_counter_read(evt_handle.get()[j], &val); + ret = rsmi_counter_read(evt_handle[tmp], &val); CHK_ERR_ASRT(ret) IF_VERB(STANDARD) { @@ -336,8 +306,9 @@ TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) { std::cout << "\t\tTime Running: " << val.time_running << std::endl; } } - for (j = 0; j < num_created; ++j) { - ret = rsmi_dev_counter_destroy(evt_handle.get()[j]); + for (uint32_t j = 0; j < avail_counters; ++j) { + tmp = static_cast(evnt + j); + ret = rsmi_dev_counter_destroy(evt_handle[tmp]); CHK_ERR_ASRT(ret) } } diff --git a/tests/rocm_smi_test/test_common.h b/tests/rocm_smi_test/test_common.h index 0b585a56ae..4b482c9c44 100755 --- a/tests/rocm_smi_test/test_common.h +++ b/tests/rocm_smi_test/test_common.h @@ -48,7 +48,6 @@ #include #include -#include #include "rocm_smi/rocm_smi.h"