diff --git a/rocm_smi/src/rocm_smi_kfd.cc b/rocm_smi/src/rocm_smi_kfd.cc index bb2a1f3d80..8077c1e7ab 100644 --- a/rocm_smi/src/rocm_smi_kfd.cc +++ b/rocm_smi/src/rocm_smi_kfd.cc @@ -866,10 +866,10 @@ int KFDNode::get_cache_info(rsmi_gpu_cache_info_t *info) { info->num_cache_types = 0; for (unsigned int cache_id = 0; cache_id < caches_count; cache_id++) { const auto prop_file = f_path + std::to_string(cache_id) + "/properties"; - std::string level = get_properties_from_file(prop_file, "level "); try { + std::string level = get_properties_from_file(prop_file, "level "); int cache_level = std::stoi(level); - if (cache_level < 0 ) continue; + if (cache_level < 0) continue; std::string type = get_properties_from_file(prop_file, "type "); int cache_type = std::stoi(type); diff --git a/tests/amd_smi_test/functional/gpu_cache_read.cc b/tests/amd_smi_test/functional/gpu_cache_read.cc new file mode 100644 index 0000000000..f8797c834e --- /dev/null +++ b/tests/amd_smi_test/functional/gpu_cache_read.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "gpu_cache_read.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../test_common.h" +#include "amd_smi/amdsmi.h" +#include "amd_smi/impl/amd_smi_utils.h" +#include "gpu_metrics_read.h" +#include "rocm_smi/rocm_smi_utils.h" + +TestGPUCacheRead::TestGPUCacheRead() : TestBase() { + set_title("GPU Cache Read Test"); + set_description( + "This test verifies the GPU cache " + "read metrics using the AMD SMI library."); +} + +TestGPUCacheRead::~TestGPUCacheRead(void) { + // Cleanup if necessary +} + +void TestGPUCacheRead::SetUp() { + TestBase::SetUp(); + return; +} + +void TestGPUCacheRead::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); } + +void TestGPUCacheRead::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestGPUCacheRead::Close() { + /** + * @brief Closes the TestGPUCacheRead test case and performs necessary cleanup. + * + * This function overrides the Close method from the TestBase class. + * It is responsible for executing any cleanup operations required after + * running the GPU cache read test. The function calls the base class's + * Close method to ensure all inherited cleanup procedures are executed. + */ + TestBase::Close(); + return; +} + +void TestGPUCacheRead::Run() { + /** + * @brief Runs the GPU cache read test. + * + * This function overrides the Run method from the TestBase class. + * It is responsible for executing the GPU cache read test using the + * AMD SMI library. The function retrieves the GPU cache read metrics + * and displays them. + */ + amdsmi_status_t err; + + TestBase::Run(); + if (setup_failed_) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + return; + } + + for (uint32_t i = 0; i < num_monitor_devs(); ++i) { + PrintDeviceHeader(processor_handles_[i]); + std::cout << "Device #" << std::to_string(i) << "\n"; + + IF_VERB(STANDARD) { + std::cout << "\n\n"; + std::cout << "\t**GPU CACHE INFO: Using static struct (Backwards Compatibility):\n"; + } + amdsmi_gpu_cache_info_t res = {}; + err = amdsmi_get_gpu_cache_info(processor_handles_[i], &res); + const char *status_string; + amdsmi_status_code_to_string(err, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_cache_info(): " << status_string << "\n"; + CHK_ERR_ASRT(err); + std::cout << "\t\tnum_cache_types: " << res.num_cache_types << "\n"; + for (unsigned int j = 0; j < res.num_cache_types; j++) { + std::cout << "\t\tCache Type " << j << ":\n"; + std::cout << "\t\t\tcache_level: " << res.cache[j].cache_level << "\n"; + std::cout << "\t\t\tcache_properties: (0x" << std::hex << res.cache[j].cache_properties + << std::dec << ") "; + + // Example string representation (adjust according to actual bit definitions) + std::string props_str; + uint32_t props = res.cache[j].cache_properties; + if (props & AMDSMI_CACHE_PROPERTY_DATA_CACHE) props_str += "Data Cache, "; + if (props & AMDSMI_CACHE_PROPERTY_INST_CACHE) props_str += "Instruction Cache, "; + if (props & AMDSMI_CACHE_PROPERTY_CPU_CACHE) props_str += "CPU Cache, "; + if (props & AMDSMI_CACHE_PROPERTY_SIMD_CACHE) props_str += "SIMD Cache, "; + if (!props_str.empty()) + props_str.erase(props_str.size() - 2); // Remove trailing comma and space + else + props_str = "None"; + std::cout << props_str << "\n"; + std::cout << "\t\t\tcache_size: " << res.cache[j].cache_size << " KB\n"; + std::cout << "\t\t\tmax_num_cu_shared: " << res.cache[j].max_num_cu_shared << "\n"; + std::cout << "\t\t\tnum_cache_instance: " << res.cache[j].num_cache_instance << "\n"; + } + } +} \ No newline at end of file diff --git a/tests/amd_smi_test/functional/gpu_cache_read.h b/tests/amd_smi_test/functional/gpu_cache_read.h new file mode 100644 index 0000000000..fa24273244 --- /dev/null +++ b/tests/amd_smi_test/functional/gpu_cache_read.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_ + +#include "../test_base.h" + +class TestGPUCacheRead : public TestBase { + public: + TestGPUCacheRead(); + // @Brief: Destructor for test case of TestGPUBusyRead + virtual ~TestGPUCacheRead(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_ \ No newline at end of file diff --git a/tests/amd_smi_test/main.cc b/tests/amd_smi_test/main.cc index 92eb224230..5cab9dfa20 100644 --- a/tests/amd_smi_test/main.cc +++ b/tests/amd_smi_test/main.cc @@ -67,6 +67,7 @@ #include "functional/init_shutdown_refcount.h" #include "functional/memorypartition_read_write.h" #include "functional/computepartition_read_write.h" +#include "functional/gpu_cache_read.h" static AMDSMITstGlobals *sRSMIGlvalues = nullptr; @@ -281,6 +282,11 @@ TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) { TestEvtNotifReadWrite tst; RunGenericTest(&tst); } + +TEST(amdsmitstReadOnly, TestGPUCacheRead) { + TestGPUCacheRead tst; + RunGenericTest(&tst); +} /* TEST(amdsmitstReadOnly, TestConcurrentInit) { TestConcurrentInit tst; diff --git a/tests/python_unittest/integration_test.py b/tests/python_unittest/integration_test.py index 024e51c27b..61148e0948 100755 --- a/tests/python_unittest/integration_test.py +++ b/tests/python_unittest/integration_test.py @@ -212,6 +212,51 @@ class TestAmdSmiPythonInterface(unittest.TestCase): print() self.tearDown() + @handle_exceptions + def test_gpu_cache_info(self): + self.setUp() + print("\n\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info") + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), 32) + for i in range(0, len(processors)): + print("\n\n###Test Processor {}, bdf: {}".format(i, amdsmi.amdsmi_get_gpu_device_bdf(processors[i]))) + print("\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info \n") + try: + cache_info = amdsmi.amdsmi_interface.amdsmi_get_gpu_cache_info(processors[i]) + except Exception as e: + print(f" Exception in amdsmi_get_gpu_cache_info: {e}") + self.fail(f"Test failed due to exception: {e}") + if isinstance(cache_info, dict): + for key, value in cache_info.items(): + print(f" {key}: {value}") + for cache_entry in cache_info.get('cache', []): + self.assertIn('cache_size', cache_entry) + self.assertIn('cache_level', cache_entry) + self.assertIn('num_cache_instance', cache_entry) + self.assertIn('max_num_cu_shared', cache_entry) + else: + print(" cache_info: {}".format(cache_info)) + print() + self.tearDown() + + @handle_exceptions + def test_get_gpu_compute_partition(self): + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreater(len(processors), 0) + for i in range(0, len(processors)): + bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + try: + result = amdsmi.amdsmi_get_gpu_compute_partition(processors[i]) + self.assertIsInstance(result, str) + self.assertTrue(len(result) > 0) + print(f"\nCompute partition for handle {bdf}: {result}") + except Exception as e: + print(f"\nCompute partition not supported for handle {bdf}: {e}") + continue + print("All compute partitions returned as strings successfully (or not supported).") + self.tearDown() + def test_bdf_device_id(self): self.setUp() processors = amdsmi.amdsmi_get_processor_handles()