diff --git a/amdsmi_cli/README.md b/amdsmi_cli/README.md index 3621ca672c..4f1a9a5ad7 100644 --- a/amdsmi_cli/README.md +++ b/amdsmi_cli/README.md @@ -439,7 +439,6 @@ LIMIT: DRIVER: DRIVER_NAME: amdgpu DRIVER_VERSION: 6.1.10 - DRIVER_DATE: 2015/01/01 00:00 VRAM: VRAM_TYPE: GDDR6 VRAM_VENDOR: SAMSUNG @@ -448,6 +447,8 @@ CACHE: CACHE 0: CACHE_SIZE: 16 KB CACHE_LEVEL: 1 + MAX_NUM_CU_SHARED: 1 + NUM_CACHE_INSTANCE: 304 RAS: EEPROM_VERSION: N/A PARITY_SCHEMA: N/A diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 21145bc272..fd9a1bf856 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -431,8 +431,8 @@ class AMDSMICommands(): static_dict['limit'] = limit_info if args.driver: driver_info = {"driver_name" : "N/A", - "driver_version" : "N/A", - "driver_date" : "N/A"} + "driver_version" : "N/A" + } try: driver_info = amdsmi_interface.amdsmi_get_gpu_driver_info(args.gpu) @@ -477,12 +477,11 @@ class AMDSMICommands(): cache_info = amdsmi_interface.amdsmi_get_gpu_cache_info(args.gpu) for cache_key, cache_dict in cache_info.items(): for key, value in cache_dict.items(): - if key == 'cache_size' or key == 'cache_level': + if key == 'cache_size' or key == 'cache_level' or \ + key == 'max_num_cu_shared' or key == 'num_cache_instance': continue if value: cache_info[cache_key][key] = "ENABLED" - else: - cache_info[cache_key][key] = "DISABLED" if self.logger.is_human_readable_format(): for _ , cache_values in cache_info.items(): cache_values['cache_size'] = f"{cache_values['cache_size']} KB" diff --git a/example/amd_smi_drm_example.cc b/example/amd_smi_drm_example.cc index 7d046cf4c5..280011b0f7 100644 --- a/example/amd_smi_drm_example.cc +++ b/example/amd_smi_drm_example.cc @@ -316,6 +316,9 @@ int main() { cache_info.cache[i].cache_level, cache_info.cache[i].cache_size_kb, cache_info.cache[i].flags); + printf("\tMax number CU shared: %d, Number of instances: %d\n", + cache_info.cache[i].max_num_cu_shared, + cache_info.cache[i].num_cache_instance); } // Get power measure diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 96d7b66b39..6dff3c3306 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -475,6 +475,8 @@ typedef struct { uint32_t cache_size_kb; /* In KB */ uint32_t cache_level; uint32_t flags; // amdsmi_cache_flags_type_t which is a bitmask + uint32_t max_num_cu_shared; /* Indicates how many Compute Units share this cache instance */ + uint32_t num_cache_instance; /* total number of instance of this cache type */ uint32_t reserved[3]; } cache[AMDSMI_MAX_CACHE_TYPES]; uint32_t reserved[15]; diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index acd7ecb80b..dc4a92452e 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -1308,6 +1308,8 @@ def amdsmi_get_gpu_cache_info( for cache_index in range(cache_info.num_cache_types): cache_size = cache_info.cache[cache_index].cache_size_kb cache_level = cache_info.cache[cache_index].cache_level + max_num_cu_shared = cache_info.cache[cache_index].max_num_cu_shared + num_cache_instance = cache_info.cache[cache_index].num_cache_instance cache_flags = cache_info.cache[cache_index].flags data_cache = bool(cache_flags & amdsmi_wrapper.CACHE_FLAGS_DATA_CACHE) inst_cache = bool(cache_flags & amdsmi_wrapper.CACHE_FLAGS_INST_CACHE) @@ -1315,10 +1317,13 @@ def amdsmi_get_gpu_cache_info( simd_cache = bool(cache_flags & amdsmi_wrapper.CACHE_FLAGS_SIMD_CACHE) cache_info_dict[f"cache {cache_index}"] = {"cache_size": cache_size, "cache_level": cache_level, - "data_cache": data_cache, - "instruction_cache": inst_cache, - "cpu_cache": cpu_cache, - "simd_cache": simd_cache} + "max_num_cu_shared": max_num_cu_shared, + "num_cache_instance": num_cache_instance} + if (data_cache): cache_info_dict[f"cache {cache_index}"]["data_cache"] = data_cache + if (inst_cache): cache_info_dict[f"cache {cache_index}"]["inst_cache"] = inst_cache + if (cpu_cache): cache_info_dict[f"cache {cache_index}"]["cpu_cache"] = cpu_cache + if (simd_cache): cache_info_dict[f"cache {cache_index}"]["simd_cache"] = simd_cache + if cache_info_dict == {}: raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_NO_DATA) @@ -1624,8 +1629,7 @@ def amdsmi_get_gpu_driver_info( return { "driver_name": info.driver_name.decode("utf-8"), - "driver_version": info.driver_version.decode("utf-8"), - "driver_date": info.driver_date.decode("utf-8") + "driver_version": info.driver_version.decode("utf-8") } diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 6461b3a0d3..dccdcd81f2 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -773,6 +773,8 @@ struct_cache_._fields_ = [ ('cache_size_kb', ctypes.c_uint32), ('cache_level', ctypes.c_uint32), ('flags', ctypes.c_uint32), + ('max_num_cu_shared', ctypes.c_uint32), + ('num_cache_instance', ctypes.c_uint32), ('reserved', ctypes.c_uint32 * 3), ] diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index 8440175820..0236803e1e 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -876,6 +876,8 @@ typedef struct { so HSA_CACHE_TYPE_DATA|HSA_CACHE_TYPE_HSACU == 9 */ uint32_t flags; + uint32_t max_num_cu_shared; /* Indicates how many Compute Units share this cache instance */ + uint32_t num_cache_instance; /* total number of instance of this cache type */ } cache[RSMI_MAX_CACHE_TYPES]; } rsmi_gpu_cache_info_t; /// \cond Ignore in docs. diff --git a/rocm_smi/src/rocm_smi_kfd.cc b/rocm_smi/src/rocm_smi_kfd.cc index 2c74b289e8..fb2c215722 100755 --- a/rocm_smi/src/rocm_smi_kfd.cc +++ b/rocm_smi/src/rocm_smi_kfd.cc @@ -930,17 +930,27 @@ int KFDNode::get_cache_info(rsmi_gpu_cache_info_t *info) { int cache_type = std::stoi(type); if (cache_type <= 0) continue; - // only count once + // num_cu_shared – this can be fetched by counting the number of 1’s in the sibling_map. + std::string sibling_map = + get_properties_from_file(prop_file, "sibling_map "); + uint32_t num_cu_shared = + std::count(sibling_map.begin(), sibling_map.end(), '1'); + + // known cache type bool is_count_already = false; for (unsigned int i=0; i < info->num_cache_types; i++) { if (info->cache[i].cache_level == static_cast(cache_level) && info->cache[i].flags == static_cast(cache_type)) { is_count_already = true; + if (info->cache[i].max_num_cu_shared < num_cu_shared) + info->cache[i].max_num_cu_shared = num_cu_shared; + info->cache[i].num_cache_instance++; break; } } if (is_count_already) continue; + // new cache type if (info->num_cache_types >= RSMI_MAX_CACHE_TYPES) return 1; std::string size = get_properties_from_file(prop_file, "size "); int cache_size = std::stoi(size); @@ -948,6 +958,8 @@ int KFDNode::get_cache_info(rsmi_gpu_cache_info_t *info) { info->cache[info->num_cache_types].cache_level = cache_level; info->cache[info->num_cache_types].cache_size_kb = cache_size; + info->cache[info->num_cache_types].max_num_cu_shared = num_cu_shared; + info->cache[info->num_cache_types].num_cache_instance = 1; info->cache[info->num_cache_types].flags = cache_type; info->num_cache_types++; } catch (...) { diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index ba4c8091b3..917784d582 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -520,6 +520,8 @@ amdsmi_status_t amdsmi_get_gpu_cache_info( for (unsigned int i =0; i < rsmi_info.num_cache_types; i++) { info->cache[i].cache_size_kb = rsmi_info.cache[i].cache_size_kb; info->cache[i].cache_level = rsmi_info.cache[i].cache_level; + info->cache[i].max_num_cu_shared = rsmi_info.cache[i].max_num_cu_shared; + info->cache[i].num_cache_instance = rsmi_info.cache[i].num_cache_instance; // convert from sysfs type to CRAT type(HSA Cache Affinity type) info->cache[i].flags = 0; if (rsmi_info.cache[i].flags & HSA_CACHE_TYPE_DATA)