APIs for the cache level and size

Read the cache level and size from topoogy sysfs file.

Change-Id: Id3c558c95bcb79139a19e4adbaa7ff333d06098f
Tento commit je obsažen v:
Bill(Shuzhou) Liu
2023-10-03 11:25:33 -05:00
rodič 656f12e0f3
revize 1a233f93fb
7 změnil soubory, kde provedl 181 přidání a 0 odebrání
+11
Zobrazit soubor
@@ -306,6 +306,17 @@ int main() {
printf("\tVBios Version String: %s\n\n",
vbios_info.version);
// Get Cache info
amdsmi_gpu_cache_info_t cache_info = {};
ret = amdsmi_get_gpu_cache_info(processor_handles[j], &cache_info);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_gpu_cache_info:\n");
for (unsigned int i = 0 ; i < cache_info.num_cache_types; i++) {
printf("\tCache Level: %d, Cache Size: %d KB\n",
cache_info.cache[i].cache_level,
cache_info.cache[i].cache_size_kb);
}
// Get power measure
amdsmi_power_info_t power_measure = {};
ret = amdsmi_get_power_info(processor_handles[j], &power_measure);
+24
Zobrazit soubor
@@ -84,6 +84,7 @@ typedef enum {
#define AMDSMI_MAX_DRIVER_VERSION_LENGTH 80
#define AMDSMI_PRODUCT_NAME_LENGTH 128
#define AMDSMI_MAX_CONTAINER_TYPE 2
#define AMDSMI_MAX_CACHE_TYPES 10
#define AMDSMI_GPU_UUID_SIZE 38
@@ -415,6 +416,16 @@ typedef struct {
uint32_t reserved[16];
} amdsmi_vbios_info_t;
typedef struct {
uint32_t num_cache_types;
struct {
uint32_t cache_size_kb; /* In KB */
uint32_t cache_level;
uint32_t reserved[3];
} cache[AMDSMI_MAX_CACHE_TYPES];
uint32_t reserved[15];
} amdsmi_gpu_cache_info_t;
typedef struct {
uint8_t num_fw_info;
struct fw_info_list_ {
@@ -2158,6 +2169,19 @@ amdsmi_status_t amdsmi_get_temp_metric(amdsmi_processor_handle processor_handle
amdsmi_temperature_type_t sensor_type,
amdsmi_temperature_metric_t metric, int64_t *temperature);
/**
* @brief Returns gpu cache info.
*
* @param[in] processor_handle PF of a processor for which to query
*
* @param[out] info reference to the cache info struct.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_get_gpu_cache_info(
amdsmi_processor_handle processor_handle, amdsmi_gpu_cache_info_t *info);
/**
* @brief Get the voltage metric value for the specified metric, from the
* specified voltage sensor on the specified device. It is not supported on
+32
Zobrazit soubor
@@ -845,6 +845,22 @@ typedef struct {
/// \cond Ignore in docs.
typedef rsmi_version_t rsmi_version;
/// \endcond
/**
* @brief This structure represents the cache size and level
*/
#define RSMI_MAX_CACHE_TYPES 10
typedef struct {
uint32_t num_cache_types;
struct {
uint32_t cache_size_kb; /* In KB */
uint32_t cache_level;
} cache[RSMI_MAX_CACHE_TYPES];
} rsmi_gpu_cache_info_t;
/// \cond Ignore in docs.
typedef rsmi_gpu_cache_info_t rsmi_gpu_cache_info;
/// \endcond
/**
* @brief This structure represents a range (e.g., frequencies or voltages).
*/
@@ -2035,6 +2051,22 @@ rsmi_status_t
rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
uint64_t *total);
/**
* @brief Get gpu cache info.
*
* @details Given a device index @p dv_ind, and a pointer to a cache
* info @p info, this function will write the cache size and level
* to the location pointed to by @p info.
* @param[in] dv_ind a device index
*
* @param[inout] info reference to the cache info struct.
* Must be allocated by user.
*
* @return ::rsmi_status_t | ::RSMI_STATUS_SUCCESS on success, non-zero on fail
*/
rsmi_status_t rsmi_dev_cache_info_get(
uint32_t dv_ind, rsmi_gpu_cache_info_t *info);
/**
* @brief Get the current memory usage
*
+3
Zobrazit soubor
@@ -84,6 +84,9 @@ class KFDNode {
int get_total_memory(uint64_t* total);
int get_used_memory(uint64_t* used);
// Get cache info from kfd
int get_cache_info(rsmi_gpu_cache_info_t *info);
private:
uint32_t node_indx_;
uint32_t amdgpu_dev_index_;
+20
Zobrazit soubor
@@ -3282,6 +3282,26 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
return ret;
CATCH
}
rsmi_status_t rsmi_dev_cache_info_get(
uint32_t dv_ind, rsmi_gpu_cache_info_t *info) {
TRY
rsmi_status_t ret;
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ss);
if (info == nullptr) return RSMI_STATUS_INVALID_ARGS;
GET_DEV_AND_KFDNODE_FROM_INDX
if (kfd_node->get_cache_info(info) == 0) return RSMI_STATUS_SUCCESS;
return RSMI_STATUS_NOT_SUPPORTED;
CATCH
}
rsmi_status_t
rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
uint64_t *used) {
+64
Zobrazit soubor
@@ -130,6 +130,25 @@ static std::string KFDDevicePath(uint32_t dev_id) {
return node_path;
}
// A generic function to extract out a property from file.
// return empty string if file or property not found
// Assume the property_name is at the beginning of the line.
static std::string get_properties_from_file(const std::string& file_name,
const std::string& property_name) {
std::ifstream infile(file_name);
if (!infile) return "";
std::string line;
while (std::getline(infile, line)) {
std::istringstream iss(line);
// the property name is at the beginning of the line
if (line.rfind(property_name.c_str(), 0) == 0) {
return line.substr(property_name.length());
}
}
return "";
}
static int OpenKFDNodeFile(uint32_t dev_id, std::string node_file,
std::ifstream *fs) {
std::string line;
@@ -874,6 +893,51 @@ int KFDNode::get_used_memory(uint64_t* used) {
return 1;
}
int KFDNode::get_cache_info(rsmi_gpu_cache_info_t *info) {
if (info == nullptr) return EINVAL;
uint64_t caches_count = 0;
int ret = get_property_value("caches_count", &caches_count);
if (ret != 0) return ret;
// /sys/class/kfd/kfd/topology/nodes/1/caches/0/properties
std::string f_path = kKFDNodesPathRoot;
f_path += "/";
f_path += std::to_string(node_indx_);
f_path += "/";
f_path += "caches/";
info->num_cache_types = 0;
for (unsigned int cache_id = 0; cache_id < caches_count; cache_id++) {
const auto prop_file = f_path + std::to_string(cache_id) + "/properties";
std::string level = get_properties_from_file(prop_file, "level ");
try {
int cache_level = std::stoi(level);
if (cache_level < 0 ) continue;
// only count once
bool is_count_already = false;
for (unsigned int i=0; i < info->num_cache_types; i++) {
if (info->cache->cache_level == static_cast<uint32_t>(cache_level)) {
is_count_already = true;
break;
}
}
if (is_count_already) continue;
if (info->num_cache_types >= RSMI_MAX_CACHE_TYPES) return 1;
std::string size = get_properties_from_file(prop_file, "size ");
int cache_size = std::stoi(size);
if (cache_size <= 0) continue;
info->cache[info->num_cache_types].cache_level = cache_level;
info->cache[info->num_cache_types].cache_size_kb = cache_size;
info->num_cache_types++;
} catch (...) {
continue;
}
}
return 0;
}
// /sys/class/kfd/kfd/topology/nodes/*/properties
int read_node_properties(uint32_t node, std::string property_name,
uint64_t *val) {
+27
Zobrazit soubor
@@ -478,6 +478,33 @@ amdsmi_status_t amdsmi_get_gpu_board_info(amdsmi_processor_handle processor_hand
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t amdsmi_get_gpu_cache_info(
amdsmi_processor_handle processor_handle, amdsmi_gpu_cache_info_t *info) {
AMDSMI_CHECK_INIT();
if (info == nullptr) {
return AMDSMI_STATUS_INVAL;
}
amd::smi::AMDSmiGPUDevice* gpu_device = nullptr;
amdsmi_status_t status = get_gpu_device_from_handle(
processor_handle, &gpu_device);
if (status != AMDSMI_STATUS_SUCCESS)
return status;
rsmi_gpu_cache_info_t rsmi_info;
status = rsmi_wrapper(rsmi_dev_cache_info_get,
processor_handle, &rsmi_info);
if (status != AMDSMI_STATUS_SUCCESS)
return status;
info->num_cache_types = rsmi_info.num_cache_types;
for (unsigned int i =0; i < rsmi_info.num_cache_types; i++) {
info->cache[i].cache_size_kb = rsmi_info.cache[i].cache_size_kb;
info->cache[i].cache_level = rsmi_info.cache[i].cache_level;
}
return AMDSMI_STATUS_SUCCESS;
}
amdsmi_status_t amdsmi_get_temp_metric(amdsmi_processor_handle processor_handle,
amdsmi_temperature_type_t sensor_type,
amdsmi_temperature_metric_t metric, int64_t *temperature) {