From c56ac1825e4570906f4baaf1cbfd4a5de29cdd5f Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Fri, 16 Nov 2018 15:55:38 -0600 Subject: [PATCH] Add get and set routines for PCIe bandwidth [ROCm/rocm_smi_lib commit: 9c897ab86dbb69ffd140c21ab3509c6d6f04e1a7] --- .../rocm-smi-lib/include/rocm_smi/rocm_smi.h | 66 +++++++++- .../include/rocm_smi/rocm_smi_device.h | 1 + projects/rocm-smi-lib/src/rocm_smi.cc | 123 ++++++++++++++---- projects/rocm-smi-lib/src/rocm_smi_device.cc | 5 + .../rocm_smi_test/functional/rsmi_sanity.cc | 70 +++++++++- 5 files changed, 236 insertions(+), 29 deletions(-) diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 28dcb053a6..476134b09e 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -124,7 +124,6 @@ typedef enum { RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS, RSMI_CLK_TYPE_MEM, //!< Memory clock - RSMI_CLK_TYPE_LAST = RSMI_CLK_TYPE_MEM } rsmi_clk_type; @@ -239,6 +238,24 @@ typedef struct { uint64_t frequency[RSMI_MAX_NUM_FREQUENCIES]; } rsmi_frequencies; +/** + * @brief This structure holds information about the possible PCIe + * bandwidths. Specifically, the possible transfer rates and their + * associated numbers of lanes are stored here. + */ +typedef struct { + /** + * Transfer rates (T/s) that are possible + */ + rsmi_frequencies transfer_rate; + + /** + * List of lanes for corresponding transfer rate. + * Only the first num_supported bandwidths are valid. + */ + uint32_t lanes[RSMI_MAX_NUM_FREQUENCIES]; +} rsmi_pcie_bandwidth; + /** * @brief Initialize Rocm SMI. @@ -274,6 +291,51 @@ rsmi_status_t rsmi_shut_down(void); */ rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices); +/** + * @brief Get the list of possible pci bandwidths that are available. + * + * @details Given a device index @p dv_ind and a pointer to a to an + * rsmi_pcie_bandwidth structure @p bandwidth, this function will fill in + * @p bandwidth with the possible T/s values and associated number of lanes, + * and indication of the current selection. + * + * @param[in] dv_ind a device index + * + * @param[inout] bandwidth a pointer to a caller provided rsmi_pcie_bandwidth + * structure to which the frequency information will be written + * + * @retval RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t +rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth *bandwidth); + +/** + * @brief Control the set of allowed PCIe bandwidths that can be used. + * + * @details Given a device index @p dv_ind and a 64 bit bitmask @p bw_bitmask, + * this function will limit the set of allowable bandwidths. If a bit in @p + * bw_bitmask has a value of 1, then the frequency (as ordered in an + * rsmi_frequencies returned by rsmi_dev_get_gpu_clk_freq()) corresponding + * to that bit index will be allowed. + * + * This function will change the performance level to + * ::RSMI_DEV_PERF_LEVEL_MANUAL in order to modify the set of allowable + * band_widths. Caller will need to set to ::RSMI_DEV_PERF_LEVEL_AUTO in order + * to get back to default state. + * + * All bits with indices greater than or equal to + * rsmi_pcie_bandwidth.transfer_rate.num_supported will be ignored. + * + * @param[in] dv_ind a device index + * + * @param[in] bw_bitmask A bitmask indicating the indices of the + * bandwidths that are to be enabled (1) and disabled (0). Only the lowest + * rsmi_pcie_bandwidth.transfer_rate.num_supported bits of this mask are + * relevant. + */ +rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask); + /** * @brief Get the unique PCI device identifier associated for a device * @@ -436,7 +498,7 @@ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, * specified clock. * * @details Given a device index @p dv_ind, a clock type @p clk_type, and a - * 32 bit bitmask @p freq_bitmask, this function will limit the set of + * 64 bit bitmask @p freq_bitmask, this function will limit the set of * allowable frequencies. If a bit in @p freq_bitmask has a value of 1, then * the frequency (as ordered in an rsmi_frequencies returned by * rsmi_dev_get_gpu_clk_freq()) corresponding to that bit index will be diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index b306b54606..61f7774660 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -64,6 +64,7 @@ enum DevInfoTypes { kDevDevID, kDevGPUMClk, kDevGPUSClk, + kDevPCIEBW, kDevPowerProfileMode, kDevUsage, }; diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 6d7e7ff658..1f210d8a3a 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -108,15 +108,17 @@ static rsmi_status_t errno_to_rsmi_status(uint32_t err) { } } /** - * Parse a string of the form ": <|*>" + * Parse a string of the form: + * ": <|*>" */ -static uint32_t freq_string_to_int(std::string freq_line, bool *is_curr) { +static uint64_t freq_string_to_int(const std::vector &freq_lines, + bool *is_curr, uint32_t lanes[], int i) { assert(is_curr != nullptr); - std::istringstream fs(freq_line); + std::istringstream fs(freq_lines[i]); uint32_t ind; - uint32_t freq; + float freq; std::string junk; std::string units_str; std::string star_str; @@ -128,7 +130,7 @@ static uint32_t freq_string_to_int(std::string freq_line, bool *is_curr) { fs >> star_str; if (is_curr != nullptr) { - if (freq_line.find("*") != std::string::npos) { + if (freq_lines[i].find("*") != std::string::npos) { *is_curr = true; } else { *is_curr = false; @@ -136,18 +138,33 @@ static uint32_t freq_string_to_int(std::string freq_line, bool *is_curr) { } uint32_t multiplier = 0; - if (units_str == "Mhz") { - multiplier = 1000000; - } else if (units_str == "Ghz") { - multiplier = 1000000000; - } else if (units_str == "Khz") { - multiplier = 1000; - } else if (units_str == "Hz") { - multiplier = 1; - } else { - assert(!"Unexpected units for frequency"); + switch (units_str[0]) { + case 'G': // GT or GHz + multiplier = 1000000000; + break; + + case 'M': // MT or MHz + multiplier = 1000000; + break; + + case 'K': // KT or KHz + multiplier = 1000; + break; + + case 'T': // Transactions + case 'H': // Hertz + multiplier = 1; + break; + default: + assert(!"Unexpected units for frequency"); } + if (star_str[0] == 'x') { + assert(lanes != nullptr && "Lanes are provided but null lanes pointer"); + if (lanes) { + lanes[i] = std::stoi(star_str.substr(1), nullptr); + } + } return freq*multiplier; } @@ -431,7 +448,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level perf_level) { } static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, - uint32_t dv_ind, rsmi_frequencies *f) { + uint32_t dv_ind, rsmi_frequencies *f, uint32_t *lanes = nullptr) { TRY std::vector val_vec; rsmi_status_t ret; @@ -451,7 +468,7 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, f->current = RSMI_MAX_NUM_FREQUENCIES + 1; // init to an invalid value for (uint32_t i = 0; i < f->num_supported; ++i) { - f->frequency[i] = freq_string_to_int(val_vec[i], ¤t); + f->frequency[i] = freq_string_to_int(val_vec, ¤t, lanes, i); // Our assumption is that frequencies are read in from lowest to highest. // Check that that is true. @@ -666,6 +683,60 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) { CATCH } +rsmi_status_t +rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth *b) { + TRY + assert(b != nullptr); + + if (b == nullptr) { + return RSMI_STATUS_INVALID_ARGS; + } + + return get_frequencies(amd::smi::kDevPCIEBW, dv_ind, + &b->transfer_rate, b->lanes); + + CATCH +} + +rsmi_status_t +rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) { + rsmi_status_t ret; + rsmi_pcie_bandwidth bws; + + TRY + ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bws); + + if (ret != RSMI_STATUS_SUCCESS) { + return ret; + } + + assert(bws.transfer_rate.num_supported <= RSMI_MAX_NUM_FREQUENCIES); + + amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); + + // Above call to rsmi_dev_pci_bandwidth_get() should have emitted an error + // if assert below is not true + assert(dv_ind < smi.monitor_devices().size()); + + std::string freq_enable_str = + bitfield_to_freq_string(bw_bitmask, bws.transfer_rate.num_supported); + + std::shared_ptr dev = smi.monitor_devices()[dv_ind]; + assert(dev != nullptr); + + ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + if (ret != RSMI_STATUS_SUCCESS) { + return ret; + } + + uint32_t ret_i; + ret_i = dev->writeDevInfo(amd::smi::kDevPCIEBW, freq_enable_str); + + return errno_to_rsmi_status(ret_i); + + CATCH +} + rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_temperature_metric metric, int64_t *temperature) { @@ -1032,17 +1103,17 @@ rsmi_status_t rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) { TRY std::string val_str; - rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind, - &val_str); - if (ret != RSMI_STATUS_SUCCESS) { - return ret; - } + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind, + &val_str); + if (ret != RSMI_STATUS_SUCCESS) { + return ret; + } - errno = 0; - *busy_percent = strtoul(val_str.c_str(), nullptr, 10); - assert(errno == 0); + errno = 0; + *busy_percent = strtoul(val_str.c_str(), nullptr, 10); + assert(errno == 0); - return RSMI_STATUS_SUCCESS; + return RSMI_STATUS_SUCCESS; CATCH } diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 3ec5382850..98cbb48422 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -64,6 +64,7 @@ static const char *kDevDevIDFName = "device"; static const char *kDevOverDriveLevelFName = "pp_sclk_od"; static const char *kDevGPUSClkFName = "pp_dpm_sclk"; static const char *kDevGPUMClkFName = "pp_dpm_mclk"; +static const char *kDevGPUPCIEClkFname = "pp_dpm_pcie"; static const char *kDevPowerProfileModeFName = "pp_power_profile_mode"; static const char *kDevUsageFName = "gpu_busy_percent"; @@ -83,6 +84,7 @@ static const std::map kDevAttribNameMap = { {kDevDevID, kDevDevIDFName}, {kDevGPUMClk, kDevGPUMClkFName}, {kDevGPUSClk, kDevGPUSClkFName}, + {kDevPCIEBW, kDevGPUPCIEClkFname}, {kDevPowerProfileMode, kDevPowerProfileModeFName}, {kDevUsage, kDevUsageFName}, }; @@ -195,6 +197,7 @@ int Device::writeDevInfo(DevInfoTypes type, uint64_t val) { case kDevGPUMClk: // integer (index within num-freq range) case kDevGPUSClk: // integer (index within num-freq range) + case kDevPCIEBW: // integer (index within num-freq range) case kDevDevID: // string (read-only) default: break; @@ -207,6 +210,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) { switch (type) { case kDevGPUMClk: case kDevGPUSClk: + case kDevPCIEBW: return writeDevInfoStr(type, val); case kDevOverDriveLevel: @@ -276,6 +280,7 @@ int Device::readDevInfo(DevInfoTypes type, std::vector *val) { switch (type) { case kDevGPUMClk: case kDevGPUSClk: + case kDevPCIEBW: case kDevPowerProfileMode: return readDevInfoMultiLineStr(type, val); break; diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/rsmi_sanity.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/rsmi_sanity.cc index d75f9bd5ff..d99980e472 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/rsmi_sanity.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/rsmi_sanity.cc @@ -451,10 +451,66 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) { return RSMI_STATUS_SUCCESS; } -static void print_frequencies(rsmi_frequencies *f) { +static rsmi_status_t test_set_pci_bw(uint32_t dv_ind) { + rsmi_status_t ret; + rsmi_pcie_bandwidth bw; + uint32_t freq_bitmask; + + print_test_header("PCIe Bandwidth Control", dv_ind); + + ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bw); + CHK_ERR_RET(ret) + + IF_VERB(STANDARD) { + std::cout << "Initial PCIe is " << bw.transfer_rate.current << std::endl; + } + + // First set the bitmask to all supported bandwidths + freq_bitmask = ~(~0 << bw.transfer_rate.num_supported); + + // Then, set the bitmask to all bandwidths besides the initial BW + freq_bitmask ^= (1 << bw.transfer_rate.current); + + std::string freq_bm_str = + std::bitset(freq_bitmask).to_string(); + + freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'), + freq_bm_str.size()-1)); + + IF_VERB(STANDARD) { + std::cout << "Setting bandwidth mask to " << "0b" << freq_bm_str << + " ..." << std::endl; + } + ret = rsmi_dev_pci_bandwidth_set(dv_ind, freq_bitmask); + CHK_ERR_RET(ret) + + ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bw); + CHK_ERR_RET(ret) + + IF_VERB(STANDARD) { + std::cout << "Bandwidth is now index " << bw.transfer_rate.current << + std::endl; + std::cout << "Resetting mask to all bandwidths." << std::endl; + } + ret = rsmi_dev_pci_bandwidth_set(dv_ind, 0xFFFFFFFF); + CHK_ERR_RET(ret) + + ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); + CHK_ERR_RET(ret) + + return RSMI_STATUS_SUCCESS; +} + +static void print_frequencies(rsmi_frequencies *f, uint32_t *l=nullptr) { assert(f != nullptr); for (uint32_t j = 0; j < f->num_supported; ++j) { std::cout << "\t** " << j << ": " << f->frequency[j]; + if (l != nullptr) { + std::cout << "T/s; x" << l[j]; + } else { + std::cout << "Hz"; + } + if (j == f->current) { std::cout << " *"; } @@ -500,6 +556,7 @@ void TestSanity::Run(void) { uint32_t val_ui32; rsmi_dev_perf_level pfl; rsmi_frequencies f; + rsmi_pcie_bandwidth b; uint32_t num_monitor_devs = 0; @@ -542,6 +599,14 @@ void TestSanity::Run(void) { std::cout << f.num_supported << std::endl; print_frequencies(&f); } + err = rsmi_dev_pci_bandwidth_get(i, &b); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**Supported PCIe bandwidths: "; + std::cout << b.transfer_rate.num_supported << std::endl; + print_frequencies(&b.transfer_rate, b.lanes); + } + err = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f); CHK_ERR_ASRT(err) IF_VERB(STANDARD) { @@ -689,6 +754,9 @@ void TestSanity::Run(void) { err = test_set_freq(i); CHK_RSMI_PERM_ERR(err) + err = test_set_pci_bw(i); + CHK_RSMI_PERM_ERR(err) + err = test_set_fan_speed(i); CHK_RSMI_PERM_ERR(err)