[SWDEV-531904] - Added GPU Cache Read Tests (#464)
New: - gpu_cache_read.h and gpu_cache_read.cc - Test reads GPU cache info and asserts valid structure Updated: - integration_test.py - Added test_gpu_cache_info() and asserts valid structure - test_get_gpu_compute_partition() to loop through all devices when test fail/pass Added: - test_get_gpu_compute_partition_returns_string() to integration_test.py - This test displays the current compute partition for each bdf --------- Signed-off-by: Juan Castillo <juan.castillo@amd.com> Signed-off-by: Castillo, Juan <Juan.Castillo@amd.com> Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Tento commit je obsažen v:
@@ -866,10 +866,10 @@ int KFDNode::get_cache_info(rsmi_gpu_cache_info_t *info) {
|
||||
info->num_cache_types = 0;
|
||||
for (unsigned int cache_id = 0; cache_id < caches_count; cache_id++) {
|
||||
const auto prop_file = f_path + std::to_string(cache_id) + "/properties";
|
||||
std::string level = get_properties_from_file(prop_file, "level ");
|
||||
try {
|
||||
std::string level = get_properties_from_file(prop_file, "level ");
|
||||
int cache_level = std::stoi(level);
|
||||
if (cache_level < 0 ) continue;
|
||||
if (cache_level < 0) continue;
|
||||
|
||||
std::string type = get_properties_from_file(prop_file, "type ");
|
||||
int cache_type = std::stoi(type);
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "gpu_cache_read.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "../test_common.h"
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "gpu_metrics_read.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
TestGPUCacheRead::TestGPUCacheRead() : TestBase() {
|
||||
set_title("GPU Cache Read Test");
|
||||
set_description(
|
||||
"This test verifies the GPU cache "
|
||||
"read metrics using the AMD SMI library.");
|
||||
}
|
||||
|
||||
TestGPUCacheRead::~TestGPUCacheRead(void) {
|
||||
// Cleanup if necessary
|
||||
}
|
||||
|
||||
void TestGPUCacheRead::SetUp() {
|
||||
TestBase::SetUp();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestGPUCacheRead::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); }
|
||||
|
||||
void TestGPUCacheRead::DisplayResults(void) const {
|
||||
TestBase::DisplayResults();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestGPUCacheRead::Close() {
|
||||
/**
|
||||
* @brief Closes the TestGPUCacheRead test case and performs necessary cleanup.
|
||||
*
|
||||
* This function overrides the Close method from the TestBase class.
|
||||
* It is responsible for executing any cleanup operations required after
|
||||
* running the GPU cache read test. The function calls the base class's
|
||||
* Close method to ensure all inherited cleanup procedures are executed.
|
||||
*/
|
||||
TestBase::Close();
|
||||
return;
|
||||
}
|
||||
|
||||
void TestGPUCacheRead::Run() {
|
||||
/**
|
||||
* @brief Runs the GPU cache read test.
|
||||
*
|
||||
* This function overrides the Run method from the TestBase class.
|
||||
* It is responsible for executing the GPU cache read test using the
|
||||
* AMD SMI library. The function retrieves the GPU cache read metrics
|
||||
* and displays them.
|
||||
*/
|
||||
amdsmi_status_t err;
|
||||
|
||||
TestBase::Run();
|
||||
if (setup_failed_) {
|
||||
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
|
||||
PrintDeviceHeader(processor_handles_[i]);
|
||||
std::cout << "Device #" << std::to_string(i) << "\n";
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\n\n";
|
||||
std::cout << "\t**GPU CACHE INFO: Using static struct (Backwards Compatibility):\n";
|
||||
}
|
||||
amdsmi_gpu_cache_info_t res = {};
|
||||
err = amdsmi_get_gpu_cache_info(processor_handles_[i], &res);
|
||||
const char *status_string;
|
||||
amdsmi_status_code_to_string(err, &status_string);
|
||||
std::cout << "\t\t** amdsmi_get_gpu_cache_info(): " << status_string << "\n";
|
||||
CHK_ERR_ASRT(err);
|
||||
std::cout << "\t\tnum_cache_types: " << res.num_cache_types << "\n";
|
||||
for (unsigned int j = 0; j < res.num_cache_types; j++) {
|
||||
std::cout << "\t\tCache Type " << j << ":\n";
|
||||
std::cout << "\t\t\tcache_level: " << res.cache[j].cache_level << "\n";
|
||||
std::cout << "\t\t\tcache_properties: (0x" << std::hex << res.cache[j].cache_properties
|
||||
<< std::dec << ") ";
|
||||
|
||||
// Example string representation (adjust according to actual bit definitions)
|
||||
std::string props_str;
|
||||
uint32_t props = res.cache[j].cache_properties;
|
||||
if (props & AMDSMI_CACHE_PROPERTY_DATA_CACHE) props_str += "Data Cache, ";
|
||||
if (props & AMDSMI_CACHE_PROPERTY_INST_CACHE) props_str += "Instruction Cache, ";
|
||||
if (props & AMDSMI_CACHE_PROPERTY_CPU_CACHE) props_str += "CPU Cache, ";
|
||||
if (props & AMDSMI_CACHE_PROPERTY_SIMD_CACHE) props_str += "SIMD Cache, ";
|
||||
if (!props_str.empty())
|
||||
props_str.erase(props_str.size() - 2); // Remove trailing comma and space
|
||||
else
|
||||
props_str = "None";
|
||||
std::cout << props_str << "\n";
|
||||
std::cout << "\t\t\tcache_size: " << res.cache[j].cache_size << " KB\n";
|
||||
std::cout << "\t\t\tmax_num_cu_shared: " << res.cache[j].max_num_cu_shared << "\n";
|
||||
std::cout << "\t\t\tnum_cache_instance: " << res.cache[j].num_cache_instance << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_
|
||||
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_
|
||||
|
||||
#include "../test_base.h"
|
||||
|
||||
class TestGPUCacheRead : public TestBase {
|
||||
public:
|
||||
TestGPUCacheRead();
|
||||
// @Brief: Destructor for test case of TestGPUBusyRead
|
||||
virtual ~TestGPUCacheRead();
|
||||
|
||||
// @Brief: Setup the environment for measurement
|
||||
virtual void SetUp();
|
||||
|
||||
// @Brief: Core measurement execution
|
||||
virtual void Run();
|
||||
|
||||
// @Brief: Clean up and retrive the resource
|
||||
virtual void Close();
|
||||
|
||||
// @Brief: Display results
|
||||
virtual void DisplayResults() const;
|
||||
|
||||
// @Brief: Display information about what this test does
|
||||
virtual void DisplayTestInfo(void);
|
||||
};
|
||||
|
||||
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_GPU_CACHE_READ_H_
|
||||
@@ -67,6 +67,7 @@
|
||||
#include "functional/init_shutdown_refcount.h"
|
||||
#include "functional/memorypartition_read_write.h"
|
||||
#include "functional/computepartition_read_write.h"
|
||||
#include "functional/gpu_cache_read.h"
|
||||
|
||||
static AMDSMITstGlobals *sRSMIGlvalues = nullptr;
|
||||
|
||||
@@ -281,6 +282,11 @@ TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) {
|
||||
TestEvtNotifReadWrite tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
|
||||
TEST(amdsmitstReadOnly, TestGPUCacheRead) {
|
||||
TestGPUCacheRead tst;
|
||||
RunGenericTest(&tst);
|
||||
}
|
||||
/*
|
||||
TEST(amdsmitstReadOnly, TestConcurrentInit) {
|
||||
TestConcurrentInit tst;
|
||||
|
||||
@@ -212,6 +212,51 @@ class TestAmdSmiPythonInterface(unittest.TestCase):
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
@handle_exceptions
|
||||
def test_gpu_cache_info(self):
|
||||
self.setUp()
|
||||
print("\n\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info")
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
self.assertGreaterEqual(len(processors), 1)
|
||||
self.assertLessEqual(len(processors), 32)
|
||||
for i in range(0, len(processors)):
|
||||
print("\n\n###Test Processor {}, bdf: {}".format(i, amdsmi.amdsmi_get_gpu_device_bdf(processors[i])))
|
||||
print("\n###Test amdsmi_interface.amdsmi_get_gpu_cache_info \n")
|
||||
try:
|
||||
cache_info = amdsmi.amdsmi_interface.amdsmi_get_gpu_cache_info(processors[i])
|
||||
except Exception as e:
|
||||
print(f" Exception in amdsmi_get_gpu_cache_info: {e}")
|
||||
self.fail(f"Test failed due to exception: {e}")
|
||||
if isinstance(cache_info, dict):
|
||||
for key, value in cache_info.items():
|
||||
print(f" {key}: {value}")
|
||||
for cache_entry in cache_info.get('cache', []):
|
||||
self.assertIn('cache_size', cache_entry)
|
||||
self.assertIn('cache_level', cache_entry)
|
||||
self.assertIn('num_cache_instance', cache_entry)
|
||||
self.assertIn('max_num_cu_shared', cache_entry)
|
||||
else:
|
||||
print(" cache_info: {}".format(cache_info))
|
||||
print()
|
||||
self.tearDown()
|
||||
|
||||
@handle_exceptions
|
||||
def test_get_gpu_compute_partition(self):
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
self.assertGreater(len(processors), 0)
|
||||
for i in range(0, len(processors)):
|
||||
bdf = amdsmi.amdsmi_get_gpu_device_bdf(processors[i])
|
||||
try:
|
||||
result = amdsmi.amdsmi_get_gpu_compute_partition(processors[i])
|
||||
self.assertIsInstance(result, str)
|
||||
self.assertTrue(len(result) > 0)
|
||||
print(f"\nCompute partition for handle {bdf}: {result}")
|
||||
except Exception as e:
|
||||
print(f"\nCompute partition not supported for handle {bdf}: {e}")
|
||||
continue
|
||||
print("All compute partitions returned as strings successfully (or not supported).")
|
||||
self.tearDown()
|
||||
|
||||
def test_bdf_device_id(self):
|
||||
self.setUp()
|
||||
processors = amdsmi.amdsmi_get_processor_handles()
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele