Port more rocm-smi function to amd-smi

The API support function, performance counter, process information,
topology and xgmi info.

Change-Id: I3350ec75fdd2ca1438e79134582ae83c49763056


[ROCm/amdsmi commit: 86017b799c]
This commit is contained in:
Bill(Shuzhou) Liu
2022-08-24 11:31:20 -05:00
والد 39c1c4334e
کامیت 236e4e2d3e
15فایلهای تغییر یافته به همراه1989 افزوده شده و 60 حذف شده
@@ -107,11 +107,26 @@ typedef enum device_type {
typedef enum amdsmi_status {
AMDSMI_STATUS_SUCCESS = 0, /**< Call succeeded */
AMDSMI_STATUS_INVAL, /**< Invalid parameters */
AMDSMI_STATUS_OUT_OF_RESOURCES, /**< Not enough memory */
AMDSMI_STATUS_NOT_SUPPORTED, /**< Command not supported */
AMDSMI_STATUS_FILE_ERROR, /**< Problem accessing a file. */
AMDSMI_STATUS_NO_PERM, /**< Permission Denied */
AMDSMI_STATUS_BUSY, /**< Device busy */
AMDSMI_STATUS_OUT_OF_RESOURCES, /**< Not enough memory */
AMDSMI_STATUS_INTERNAL_EXCEPTION, /**< An internal exception was caught */
AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS, /**< The provided input is out of allowable or safe range */
AMDSMI_STATUS_INIT_ERROR, /**< An error occurred when initializing internal data structures */
AMDSMI_STATUS_NOT_YET_IMPLEMENTED, /**< Not implemented yet */
AMDSMI_STATUS_NOT_FOUND, /**< Device Not found */
AMDSMI_STATUS_INSUFFICIENT_SIZE, /**< Not enough resources were available for the operation */
AMDSMI_STATUS_INTERRUPT, /**< An interrupt occurred during execution of function */
AMDSMI_STATUS_UNEXPECTED_SIZE, /**< An unexpected amount of data was read */
AMDSMI_STATUS_NO_DATA, /**< No data was found for a given input */
AMDSMI_STATUS_UNEXPECTED_DATA, /**< The data read or provided to function is not what was expected */
AMDSMI_STATUS_BUSY, /**< Device busy */
AMDSMI_STATUS_REFCOUNT_OVERFLOW, /**< An internal reference counter exceeded INT32_MAX */
AMDSMI_LIB_START = 1000,
AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START, //!< Fail to load lib
AMDSMI_STATUS_FAIL_LOAD_SYMBOL,
AMDSMI_STATUS_DRM_ERROR, //!< Error when call libdrm
AMDSMI_STATUS_IO, /**< I/O Error */
AMDSMI_STATUS_FAULT, /**< Bad address */
AMDSMI_STATUS_API_FAILED, /**< API call failed */
@@ -119,21 +134,6 @@ typedef enum amdsmi_status {
AMDSMI_STATUS_NO_SLOT, /**< No more free slot */
AMDSMI_STATUS_RETRY, /**< Retry operation */
AMDSMI_STATUS_NOT_INIT, /**< Device not initialized */
AMDSMI_STATUS_INTERNAL_EXCEPTION, /**< An internal exception was caught */
AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS, /**< The provided input is out of */
AMDSMI_STATUS_NOT_YET_IMPLEMENTED, /**< The requested function has not
yet been implemented in the
current system for the current devices */
AMDSMI_STATUS_INSUFFICIENT_SIZE, /**< Not enough resources were available for the operation */
AMDSMI_STATUS_INTERRUPT, /**< An interrupt occurred during execution of function */
AMDSMI_STATUS_UNEXPECTED_SIZE, /**< An unexpected amount of data was read */
AMDSMI_STATUS_NO_DATA, /**< No data was found for a given input */
AMDSMI_STATUS_UNEXPECTED_DATA, /**< The data read or provided to function is not what was expected */
AMDSMI_STATUS_REFCOUNT_OVERFLOW, /**< An internal reference counter exceeded INT32_MAX */
AMDSMI_LIB_START = 1000,
AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START, //!< Fail to load lib
AMDSMI_STATUS_FAIL_LOAD_SYMBOL,
AMDSMI_STATUS_DRM_ERROR, //!< Error when call libdrm
AMDSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} amdsmi_status_t;
@@ -64,14 +64,13 @@
// TODO(bliu): One to one map to all status code
static amdsmi_status_t rsmi_to_amdsmi_status(rsmi_status_t status) {
if (status == RSMI_STATUS_NO_DATA) return AMDSMI_STATUS_NO_DATA;
return static_cast<amdsmi_status_t>(status);
}
template <typename F, typename ...Args>
amdsmi_status_t rsmi_wrapper(F && f,
amdsmi_device_handle device_handle, Args &&... args) {
if (device_handle == nullptr) return AMDSMI_STATUS_INVAL;
static amdsmi_status_t get_gpu_device_from_handle(amdsmi_device_handle device_handle,
amd::smi::AMDSmiGPUDevice** gpudevice) {
if (device_handle == nullptr || gpudevice == nullptr)
return AMDSMI_STATUS_INVAL;
amd::smi::AMDSmiDevice* device = nullptr;
amdsmi_status_t r = amd::smi::AMDSmiSystem::getInstance()
@@ -79,17 +78,27 @@ amdsmi_status_t rsmi_wrapper(F && f,
if (r != AMDSMI_STATUS_SUCCESS) return r;
if (device->get_device_type() == AMD_GPU) {
amd::smi::AMDSmiGPUDevice* gpu_device =
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
uint32_t gpu_index = gpu_device->get_gpu_id();
auto r = std::forward<F>(f)(gpu_index,
std::forward<Args>(args)...);
return rsmi_to_amdsmi_status(r);
*gpudevice = static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
return AMDSMI_STATUS_SUCCESS;
}
return AMDSMI_STATUS_NOT_SUPPORTED;
}
template <typename F, typename ...Args>
amdsmi_status_t rsmi_wrapper(F && f,
amdsmi_device_handle device_handle, Args &&... args) {
amd::smi::AMDSmiGPUDevice* gpu_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpu_device);
if (r != AMDSMI_STATUS_SUCCESS) return r;
uint32_t gpu_index = gpu_device->get_gpu_id();
auto rstatus = std::forward<F>(f)(gpu_index,
std::forward<Args>(args)...);
return rsmi_to_amdsmi_status(rstatus);
}
amdsmi_status_t
amdsmi_init(uint64_t flags) {
return amd::smi::AMDSmiSystem::getInstance().init(flags);
@@ -407,8 +416,9 @@ amdsmi_event_notification_get(int timeout_ms,
std::vector<rsmi_evt_notification_data_t> r_data(*num_elem);
rsmi_status_t r = rsmi_event_notification_get(
timeout_ms, num_elem, &r_data[0]);
if (r != RSMI_STATUS_SUCCESS)
if (r != RSMI_STATUS_SUCCESS) {
return rsmi_to_amdsmi_status(r);
}
// convert output
for (uint32_t i=0; i < *num_elem; i++) {
@@ -419,8 +429,7 @@ amdsmi_event_notification_get(int timeout_ms,
MAX_EVENT_NOTIFICATION_MSG_SIZE);
amdsmi_status_t r = amd::smi::AMDSmiSystem::getInstance()
.gpu_index_to_handle(rsmi_data.dv_ind, &(data[i].device_handle));
if (r != AMDSMI_STATUS_SUCCESS)
return r;
if (r != AMDSMI_STATUS_SUCCESS) return r;
}
return AMDSMI_STATUS_SUCCESS;
@@ -430,3 +439,212 @@ amdsmi_status_t amdsmi_event_notification_stop(
amdsmi_device_handle device_handle) {
return rsmi_wrapper(rsmi_event_notification_stop, device_handle);
}
amdsmi_status_t amdsmi_dev_counter_group_supported(
amdsmi_device_handle device_handle, amdsmi_event_group_t group) {
return rsmi_wrapper(rsmi_dev_counter_group_supported, device_handle,
static_cast<rsmi_event_group_t>(group));
}
amdsmi_status_t amdsmi_dev_counter_create(amdsmi_device_handle device_handle,
amdsmi_event_type_t type, amdsmi_event_handle_t *evnt_handle) {
return rsmi_wrapper(rsmi_dev_counter_create, device_handle,
static_cast<rsmi_event_type_t>(type),
static_cast<rsmi_event_handle_t*>(evnt_handle));
}
amdsmi_status_t amdsmi_dev_counter_destroy(amdsmi_event_handle_t evnt_handle) {
rsmi_status_t r = rsmi_dev_counter_destroy(
static_cast<rsmi_event_handle_t>(evnt_handle));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t amdsmi_counter_control(amdsmi_event_handle_t evt_handle,
amdsmi_counter_command_t cmd, void *cmd_args) {
rsmi_status_t r = rsmi_counter_control(
static_cast<rsmi_event_handle_t>(evt_handle),
static_cast<rsmi_counter_command_t>(cmd), cmd_args);
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_counter_read(amdsmi_event_handle_t evt_handle,
amdsmi_counter_value_t *value) {
rsmi_status_t r = rsmi_counter_read(
static_cast<rsmi_event_handle_t>(evt_handle),
reinterpret_cast<rsmi_counter_value_t*>(value));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_counter_available_counters_get(amdsmi_device_handle device_handle,
amdsmi_event_group_t grp, uint32_t *available) {
return rsmi_wrapper(rsmi_counter_available_counters_get, device_handle,
static_cast<rsmi_event_group_t>(grp),
available);
}
amdsmi_status_t
amdsmi_topo_get_numa_node_number(amdsmi_device_handle device_handle, uint32_t *numa_node) {
return rsmi_wrapper(rsmi_topo_get_numa_node_number, device_handle, numa_node);
}
amdsmi_status_t
amdsmi_topo_get_link_weight(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst,
uint64_t *weight) {
amd::smi::AMDSmiGPUDevice* src_device = nullptr;
amd::smi::AMDSmiGPUDevice* dst_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
r = get_gpu_device_from_handle(device_handle_dst, &dst_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
auto rstatus = rsmi_topo_get_link_weight(src_device->get_gpu_id(), dst_device->get_gpu_id(),
weight);
return rsmi_to_amdsmi_status(rstatus);
}
amdsmi_status_t
amdsmi_minmax_bandwidth_get(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst,
uint64_t *min_bandwidth, uint64_t *max_bandwidth) {
amd::smi::AMDSmiGPUDevice* src_device = nullptr;
amd::smi::AMDSmiGPUDevice* dst_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
r = get_gpu_device_from_handle(device_handle_dst, &dst_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
auto rstatus = rsmi_minmax_bandwidth_get(src_device->get_gpu_id(), dst_device->get_gpu_id(),
min_bandwidth, max_bandwidth);
return rsmi_to_amdsmi_status(rstatus);
}
amdsmi_status_t
amdsmi_topo_get_link_type(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst,
uint64_t *hops, AMDSMI_IO_LINK_TYPE *type) {
amd::smi::AMDSmiGPUDevice* src_device = nullptr;
amd::smi::AMDSmiGPUDevice* dst_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
r = get_gpu_device_from_handle(device_handle_dst, &dst_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
auto rstatus = rsmi_topo_get_link_type(src_device->get_gpu_id(), dst_device->get_gpu_id(),
hops, reinterpret_cast<RSMI_IO_LINK_TYPE*>(type));
return rsmi_to_amdsmi_status(rstatus);
}
amdsmi_status_t
amdsmi_is_P2P_accessible(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst,
bool *accessible) {
amd::smi::AMDSmiGPUDevice* src_device = nullptr;
amd::smi::AMDSmiGPUDevice* dst_device = nullptr;
amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
r = get_gpu_device_from_handle(device_handle_dst, &dst_device);
if (r != AMDSMI_STATUS_SUCCESS)
return r;
auto rstatus = rsmi_is_P2P_accessible(src_device->get_gpu_id(), dst_device->get_gpu_id(),
accessible);
return rsmi_to_amdsmi_status(rstatus);
}
// TODO(bliu) : other xgmi related information
amdsmi_status
amdsmi_get_xgmi_info(amdsmi_device_handle device_handle, amdsmi_xgmi_info_t *info) {
if (info == nullptr)
return AMDSMI_STATUS_INVAL;
return rsmi_wrapper(rsmi_dev_xgmi_hive_id_get, device_handle,
&(info->xgmi_hive_id));
}
amdsmi_status_t
amdsmi_dev_xgmi_error_status(amdsmi_device_handle device_handle, amdsmi_xgmi_status_t *status) {
return rsmi_wrapper(rsmi_dev_xgmi_error_status, device_handle,
reinterpret_cast<rsmi_xgmi_status_t*>(status));
}
amdsmi_status_t
amdsmi_dev_xgmi_error_reset(amdsmi_device_handle device_handle) {
return rsmi_wrapper(rsmi_dev_xgmi_error_reset, device_handle);
}
amdsmi_status_t
amdsmi_dev_supported_func_iterator_open(amdsmi_device_handle device_handle,
amdsmi_func_id_iter_handle_t *handle) {
if (handle == nullptr)
return AMDSMI_STATUS_INVAL;
return rsmi_wrapper(rsmi_dev_supported_func_iterator_open, device_handle,
reinterpret_cast<rsmi_func_id_iter_handle_t*>(handle));
}
amdsmi_status_t
amdsmi_dev_supported_variant_iterator_open(amdsmi_func_id_iter_handle_t obj_h,
amdsmi_func_id_iter_handle_t *var_iter) {
if (var_iter == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_dev_supported_variant_iterator_open(
reinterpret_cast<rsmi_func_id_iter_handle_t>(obj_h),
reinterpret_cast<rsmi_func_id_iter_handle_t*>(var_iter));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_func_iter_next(amdsmi_func_id_iter_handle_t handle) {
auto r = rsmi_func_iter_next(
reinterpret_cast<rsmi_func_id_iter_handle_t>(handle));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_dev_supported_func_iterator_close(amdsmi_func_id_iter_handle_t *handle) {
if (handle == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_dev_supported_func_iterator_close(
reinterpret_cast<rsmi_func_id_iter_handle_t*>(handle));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_func_iter_value_get(amdsmi_func_id_iter_handle_t handle,
amdsmi_func_id_value_t *value) {
if (value == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_func_iter_value_get(
reinterpret_cast<rsmi_func_id_iter_handle_t>(handle),
reinterpret_cast<rsmi_func_id_value_t*>(value));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_compute_process_info_get(amdsmi_process_info_t *procs, uint32_t *num_items) {
if (num_items == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_compute_process_info_get(
reinterpret_cast<rsmi_process_info_t*>(procs),
num_items);
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_compute_process_info_by_pid_get(uint32_t pid, amdsmi_process_info_t *proc) {
if (proc == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_compute_process_info_by_pid_get(pid,
reinterpret_cast<rsmi_process_info_t*>(proc));
return rsmi_to_amdsmi_status(r);
}
amdsmi_status_t
amdsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
uint32_t *num_devices) {
if (dv_indices == nullptr || num_devices == nullptr)
return AMDSMI_STATUS_INVAL;
auto r = rsmi_compute_process_gpus_get(pid, dv_indices, num_devices);
return rsmi_to_amdsmi_status(r);
}
@@ -0,0 +1,66 @@
declare -A FILTER
# FILTER is meant to be used with a negative gtest filter
# Designate the tests to be excluded from all test runs first,
# followed by tests that should be excluded by device.
# Permanent exclusions
# These tests are included for debugging, but are not executed in normal
# execution on any ASIC:
PERMANENT_BLACKLIST_ALL_ASICS=
# This is the temporary blacklist for all ASICs. This is to be used when a test
# is failing consistently
TEMPORARY_BLACKLIST_ALL_ASICS=
if [ -z $PERMANENT_BLACKLIST_ALL_ASICS -a -z $TEMPORARY_BLACKLIST_ALL_ASICS ]; then
BLACKLIST_ALL_ASICS=
else
BLACKLIST_ALL_ASICS=\
"$PERMANENT_BLACKLIST_ALL_ASICS:"\
"$TEMPORARY_BLACKLIST_ALL_ASICS"
fi
# Device specific blacklists
FILTER[vega10]=\
$BLACKLIST_ALL_ASICS
# SWDEV-207510
FILTER[vega20]=\
$BLACKLIST_ALL_ASICS\
"amdsmitstReadOnly.TestFrequenciesRead:"\
"amdsmitstReadOnly.TestProcInfoRead"
# SWDEV-207510
FILTER[arcturus]=\
$BLACKLIST_ALL_ASICS\
"amdsmitstReadOnly.TestFrequenciesRead:"\
"amdsmitstReadWrite.TestFrequenciesReadWrite:"\
"amdsmitstReadOnly.TestProcInfoRead"
# SWDEV-306889
FILTER[aldebaran]=\
$BLACKLIST_ALL_ASICS\
"amdsmitstReadOnly.FanRead:"\
"amdsmitstReadOnly.TestVoltCurvRead:"\
"amdsmitstReadOnly.TestFrequenciesRead:"\
"amdsmitstReadWrite.FanReadWrite:"\
"amdsmitstReadWrite.TestFrequenciesReadWrite:"\
"amdsmitstReadWrite.TestPciReadWrite:"\
"amdsmitstReadWrite.TestPowerReadWrite"
# SWDEV-319795
FILTER[sienna_cichlid]=\
$BLACKLIST_ALL_ASICS\
"amdsmitstReadWrite.TestPerfLevelReadWrite"
# SWDEV-321166
FILTER[virtualization]=\
$BLACKLIST_ALL_ASICS\
"amdsmitstReadOnly.TestOverdriveRead:"\
"amdsmitstReadOnly.TestGPUBusyRead:"\
"amdsmitstReadWrite.FanReadWrite:"\
"amdsmitstReadWrite.TestOverdriveReadWrite:"\
"amdsmitstReadWrite.TestPowerReadWrite:"\
"amdsmitstReadWrite.TestPowerCapReadWrite"
@@ -0,0 +1,188 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <iostream>
#include <string>
#include "gtest/gtest.h"
#include "amd_smi.h"
#include "amd_smi_test/functional/api_support_read.h"
#include "amd_smi_test/test_common.h"
#include "amd_smi_test/test_utils.h"
TestAPISupportRead::TestAPISupportRead() : TestBase() {
set_title("AMDSMI API Support Read Test");
set_description("This test verifies that the supported APIs are corretly "
"identified.");
}
TestAPISupportRead::~TestAPISupportRead(void) {
}
void TestAPISupportRead::SetUp(void) {
TestBase::SetUp();
return;
}
void TestAPISupportRead::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestAPISupportRead::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestAPISupportRead::Close() {
// This will close handles opened within amdsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
void TestAPISupportRead::Run(void) {
amdsmi_status_t err;
TestBase::Run();
if (setup_failed_) {
IF_VERB(STANDARD) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
}
return;
}
amdsmi_func_id_iter_handle_t iter_handle, var_iter, sub_var_iter;
amdsmi_func_id_value_t value;
for (uint32_t x = 0; x < num_iterations(); ++x) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
IF_VERB(STANDARD) {
PrintDeviceHeader(device_handles_[i]);
std::cout << "Supported AMDSMI Functions:" << std::endl;
std::cout << "\tVariants (Monitors)" << std::endl;
}
err = amdsmi_dev_supported_func_iterator_open(device_handles_[i], &iter_handle);
CHK_ERR_ASRT(err)
while (1) {
err = amdsmi_func_iter_value_get(iter_handle, &value);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "Function Name: " << value.name << std::endl;
}
err = amdsmi_dev_supported_variant_iterator_open(iter_handle, &var_iter);
if (err != AMDSMI_STATUS_NO_DATA) {
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\tVariants/Monitors: ";
}
while (1) {
err = amdsmi_func_iter_value_get(var_iter, &value);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
if (value.id == AMDSMI_DEFAULT_VARIANT) {
std::cout << "Default Variant ";
} else {
std::cout << value.id;
}
std::cout << " (";
}
err =
amdsmi_dev_supported_variant_iterator_open(var_iter, &sub_var_iter);
if (err != AMDSMI_STATUS_NO_DATA) {
CHK_ERR_ASRT(err)
while (1) {
err = amdsmi_func_iter_value_get(sub_var_iter, &value);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << value.id << ", ";
}
err = amdsmi_func_iter_next(sub_var_iter);
if (err == AMDSMI_STATUS_NO_DATA) {
break;
}
CHK_ERR_ASRT(err)
}
err = amdsmi_dev_supported_func_iterator_close(&sub_var_iter);
CHK_ERR_ASRT(err)
}
IF_VERB(STANDARD) {
std::cout << "), ";
}
err = amdsmi_func_iter_next(var_iter);
if (err == AMDSMI_STATUS_NO_DATA) {
break;
}
CHK_ERR_ASRT(err)
}
IF_VERB(STANDARD) {
std::cout << std::endl;
}
err = amdsmi_dev_supported_func_iterator_close(&var_iter);
CHK_ERR_ASRT(err)
}
err = amdsmi_func_iter_next(iter_handle);
if (err == AMDSMI_STATUS_NO_DATA) {
break;
}
CHK_ERR_ASRT(err)
// err = amdsmi_dev_supported_variant_iterator_open(iter_handle, &var_iter);
//
}
err = amdsmi_dev_supported_func_iterator_close(&iter_handle);
CHK_ERR_ASRT(err)
}
}
}
@@ -0,0 +1,73 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_
#include "amd_smi_test/test_base.h"
class TestAPISupportRead : public TestBase {
public:
TestAPISupportRead();
// @Brief: Destructor for test case of TestAPISupportRead
virtual ~TestAPISupportRead();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_
@@ -94,7 +94,8 @@ void TestEvtNotifReadWrite::Run(void) {
if (setup_failed_) {
IF_VERB(STANDARD) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
std::cout << "** SetUp Failed for this test. Skipping.**" <<
std::endl;
}
return;
}
@@ -112,7 +113,8 @@ void TestEvtNotifReadWrite::Run(void) {
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"Event notification is not supported for this driver version." << std::endl;
"Event notification is not supported for this driver version." <<
std::endl;
}
return;
}
@@ -121,8 +123,8 @@ void TestEvtNotifReadWrite::Run(void) {
ASSERT_EQ(ret, AMDSMI_STATUS_SUCCESS);
}
amdsmi_evt_notification_data_t data[10];
uint32_t num_elem = 10;
amdsmi_evt_notification_data_t data[num_elem];
bool read_again = false;
ret = amdsmi_event_notification_get(10000, &num_elem, data);
@@ -0,0 +1,306 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "amd_smi.h"
#include "amd_smi_test/functional/hw_topology_read.h"
#include "amd_smi_test/test_common.h"
typedef struct {
std::string type;
uint64_t hops;
uint64_t weight;
bool accessible;
} gpu_link_t;
TestHWTopologyRead::TestHWTopologyRead() : TestBase() {
set_title("AMDSMI Hardware Topology Read Test");
set_description(
"This test verifies that Hardware Topology can be read properly.");
}
TestHWTopologyRead::~TestHWTopologyRead(void) {
}
void TestHWTopologyRead::SetUp(void) {
TestBase::SetUp();
return;
}
void TestHWTopologyRead::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestHWTopologyRead::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestHWTopologyRead::Close() {
// This will close handles opened within amdsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other cleanup
TestBase::Close();
}
void TestHWTopologyRead::Run(void) {
amdsmi_status_t err;
uint32_t i, j;
TestBase::Run();
if (setup_failed_) {
IF_VERB(STANDARD) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
}
return;
}
uint32_t num_devices = num_monitor_devs();
// gpu_link_t gpu_links[num_devices][num_devices];
std::vector<std::vector<gpu_link_t>> gpu_links(num_devices,
std::vector<gpu_link_t>(num_devices));
// uint32_t numa_numbers[num_devices];
std::vector<uint32_t> numa_numbers(num_devices);
for (uint32_t dv_ind = 0; dv_ind < num_devices; ++dv_ind) {
amdsmi_device_handle dev_handle = device_handles_[dv_ind];
err = amdsmi_topo_get_numa_node_number(dev_handle, &numa_numbers[dv_ind]);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**Numa Node Number. read: Not supported on this machine" <<
std::endl;
return;
}
} else {
CHK_ERR_ASRT(err)
}
}
}
for (uint32_t dv_ind_src = 0; dv_ind_src < num_devices; dv_ind_src++) {
for (uint32_t dv_ind_dst = 0; dv_ind_dst < num_devices; dv_ind_dst++) {
if (dv_ind_src == dv_ind_dst) {
gpu_links[dv_ind_src][dv_ind_dst].type = "X";
gpu_links[dv_ind_src][dv_ind_dst].hops = 0;
gpu_links[dv_ind_src][dv_ind_dst].weight = 0;
gpu_links[dv_ind_src][dv_ind_dst].accessible = true;
} else {
AMDSMI_IO_LINK_TYPE type;
err = amdsmi_topo_get_link_type(device_handles_[dv_ind_src],
device_handles_[dv_ind_dst],
&gpu_links[dv_ind_src][dv_ind_dst].hops, &type);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**Link Type. read: Not supported on this machine"
<< std::endl;
return;
}
} else {
CHK_ERR_ASRT(err)
}
} else {
switch (type) {
case AMDSMI_IOLINK_TYPE_PCIEXPRESS:
gpu_links[dv_ind_src][dv_ind_dst].type = "PCIE";
break;
case AMDSMI_IOLINK_TYPE_XGMI:
gpu_links[dv_ind_src][dv_ind_dst].type = "XGMI";
break;
default:
gpu_links[dv_ind_src][dv_ind_dst].type = "XXXX";
IF_VERB(STANDARD) {
std::cout << "\t**Invalid IO LINK type. type=" << type <<
std::endl;
}
}
}
err = amdsmi_topo_get_link_weight(device_handles_[dv_ind_src],
device_handles_[dv_ind_dst],
&gpu_links[dv_ind_src][dv_ind_dst].weight);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**Link Weight. read: Not supported on this machine"
<< std::endl;
return;
}
} else {
CHK_ERR_ASRT(err)
}
}
err = amdsmi_is_P2P_accessible(device_handles_[dv_ind_src],
device_handles_[dv_ind_dst],
&gpu_links[dv_ind_src][dv_ind_dst].accessible);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**P2P Access. check: Not supported on this machine"
<< std::endl;
return;
}
} else {
CHK_ERR_ASRT(err)
}
}
}
}
}
IF_NVERB(STANDARD) {
return;
}
std::cout << "**NUMA node number of GPUs**" << std::endl;
std::cout << std::setw(12) << std::left <<"GPU#";
std::cout <<"NUMA node number";
std::cout << std::endl;
for (i = 0; i < num_devices; ++i) {
std::cout << std::setw(12) << std::left << i;
std::cout << numa_numbers[i];
std::cout << std::endl;
}
std::cout << std::endl;
std::cout << std::endl;
std::string tmp;
std::cout << "**Type between two GPUs**" << std::endl;
std::cout << " ";
for (i = 0; i < num_devices; ++i) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(12) << std::left << tmp;
}
std::cout << std::endl;
for (i = 0; i < num_devices; i++) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].type;
}
}
std::cout << std::endl;
}
std::cout << std::endl;
std::cout << "**Hops between two GPUs**" << std::endl;
std::cout << " ";
for (i = 0; i < num_devices; ++i) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(12) << std::left << tmp;
}
std::cout << std::endl;
for (i = 0; i < num_devices; i++) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].hops;
}
}
std::cout << std::endl;
}
std::cout << std::endl;
std::cout << "**Weight between two GPUs**" << std::endl;
std::cout << " ";
for (i = 0; i < num_devices; ++i) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(12) << std::left << tmp;
}
std::cout << std::endl;
for (i = 0; i < num_devices; i++) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].weight;
}
}
std::cout << std::endl;
}
std::cout << std::endl;
std::cout << "**Access between two GPUs**" << std::endl;
std::cout << " ";
for (i = 0; i < num_devices; ++i) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(12) << std::left << tmp;
}
std::cout << std::endl;
for (i = 0; i < num_devices; i++) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
std::cout << std::boolalpha;
std::cout << std::setw(12) << std::left << gpu_links[i][j].accessible;
}
std::cout << std::endl;
}
std::cout << std::endl;
}
@@ -0,0 +1,73 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_
#include "amd_smi_test/test_base.h"
class TestHWTopologyRead : public TestBase {
public:
TestHWTopologyRead();
// @Brief: Destructor for test case of TestHWTopologyRead
virtual ~TestHWTopologyRead();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrieve the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_
@@ -0,0 +1,375 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <iostream>
#include <bitset>
#include <string>
#include <algorithm>
#include <vector>
#include <memory>
#include "gtest/gtest.h"
#include "amd_smi.h"
#include "amd_smi_test/functional/perf_cntr_read_write.h"
#include "amd_smi_test/test_common.h"
PerfCntrEvtGrp::PerfCntrEvtGrp(amdsmi_event_group_t grp, uint32_t first,
uint32_t last, std::string name) : grp_(grp), first_evt_(first),
last_evt_(last), name_(name) {
num_events_ = last_evt_ - first_evt_ + 1;
}
PerfCntrEvtGrp::~PerfCntrEvtGrp() {}
// Add new event groups to test here
#define PC_EVT_GRP(SHRT, NAME) \
PerfCntrEvtGrp(AMDSMI_EVNT_GRP_##SHRT, AMDSMI_EVNT_##SHRT##_FIRST, \
AMDSMI_EVNT_##SHRT##_LAST, NAME)
static const std::vector<PerfCntrEvtGrp> s_event_groups = {
PC_EVT_GRP(XGMI, "XGMI"),
PC_EVT_GRP(XGMI_DATA_OUT, "XGMI_DATA_OUT")
};
TestPerfCntrReadWrite::TestPerfCntrReadWrite() : TestBase() {
set_title("AMDSMI Performance Counter Read/Write Test");
set_description("The Performance counter tests verify that performance"
" counters can be controlled and read properly.");
}
TestPerfCntrReadWrite::~TestPerfCntrReadWrite(void) {
}
void TestPerfCntrReadWrite::SetUp(void) {
TestBase::SetUp();
return;
}
void TestPerfCntrReadWrite::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestPerfCntrReadWrite::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestPerfCntrReadWrite::Close() {
// This will close handles opened within amdsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
#define AMDSMI_EVNT_ENUM_FIRST(GRP_NAME) AMDSMI_EVNT_##GRP_NAME##_FIRST
#define AMDSMI_EVNT_ENUM_LAST(GRP_NAME) AMDSMI_EVNT_##GRP_NAME##_LAST
// Refactor this to handle different event groups once we have > 1 event group
void TestPerfCntrReadWrite::CountEvents(amdsmi_device_handle dv_ind,
amdsmi_event_type_t evnt, amdsmi_counter_value_t *val, int32_t sleep_sec) {
amdsmi_event_handle_t evt_handle;
amdsmi_status_t ret;
ret = amdsmi_dev_counter_create(dv_ind,
static_cast<amdsmi_event_type_t>(evnt), &evt_handle);
CHK_ERR_ASRT(ret)
// Note that amdsmi_dev_counter_create() should never return
// AMDSMI_STATUS_NOT_SUPPORTED. It will return AMDSMI_STATUS_OUT_OF_RESOURCES
// if it is unable to create a counter.
ret = amdsmi_dev_counter_create(dv_ind,
static_cast<amdsmi_event_type_t>(evnt), nullptr);
ASSERT_EQ(ret, AMDSMI_STATUS_INVAL);
ret = amdsmi_counter_control(evt_handle, AMDSMI_CNTR_CMD_START, nullptr);
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
std::cout << "amdsmi_counter_control() returned "
"AMDSMI_STATUS_NOT_SUPPORTED" << std::endl;
throw AMDSMI_STATUS_NOT_SUPPORTED;
} else {
CHK_ERR_ASRT(ret)
}
sleep(sleep_sec);
ret = amdsmi_counter_read(evt_handle, val);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\t\tValue: " << val->value << std::endl;
std::cout << "\t\t\tTime Enabled (nS): " << val->time_enabled << std::endl;
std::cout << "\t\t\tTime Running (nS): " << val->time_running << std::endl;
std::cout << "\t\t\tEvents/Second Running: " <<
val->value/static_cast<float>(val->time_running) << std::endl;
}
ret = amdsmi_dev_counter_destroy(evt_handle);
CHK_ERR_ASRT(ret)
}
static const uint64_t kGigByte = 1073741824; // 1024^3
static const uint64_t kGig = 1000000000;
static const uint64_t kVg20Level1Bandwidth = 23; // 23 GB/sec
void
TestPerfCntrReadWrite::testEventsIndividually(amdsmi_device_handle dv_ind) {
amdsmi_status_t ret;
amdsmi_counter_value_t val;
uint64_t throughput;
std::cout << "Test events sequentially (device " <<
dv_ind << ")" << std::endl;
auto utiliz = [&](amdsmi_event_type_t evt, uint32_t chan) {
IF_VERB(STANDARD) {
std::cout << "****************************" << std::endl;
std::cout << "Test XGMI Link Utilization (channel " <<
chan << ")" << std::endl;
std::cout << "****************************" << std::endl;
std::cout << "Assumed Level 1 Bandwidth: " <<
kVg20Level1Bandwidth << "GB/sec" << std::endl;
}
uint32_t tmp_verbosity = verbosity();
set_verbosity(0);
for (int i = 0; i < 5; ++i) {
std::cout << "\t\tPass " << i << ":" << std::endl;
CountEvents(dv_ind, evt, &val, 1);
double coll_time_sec = static_cast<double>(val.time_running)/kGig;
throughput = (val.value * 32)/coll_time_sec;
std::cout << "\t\t\tCollected events for " << coll_time_sec <<
" seconds" << std::endl;
std::cout << "\t\t\tEvents collected: " << val.value << std::endl;
std::cout << "\t\t\tXGMI throughput: " << throughput <<
" bytes/second" << std::endl;
std::cout << "\t\t\tXGMI Channel Utilization: " <<
100*throughput/static_cast<double>(kVg20Level1Bandwidth*kGigByte) <<
"%" << std::endl;
std::cout << "\t\t\t****" << std::endl;
}
set_verbosity(tmp_verbosity);
};
IF_VERB(STANDARD) {
std::cout << "****************************" << std::endl;
std::cout << "Test each event individually" << std::endl;
std::cout << "****************************" << std::endl;
}
for (PerfCntrEvtGrp grp : s_event_groups) {
ret = amdsmi_dev_counter_group_supported(dv_ind, grp.group());
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
continue;
}
IF_VERB(STANDARD) {
std::cout << "Testing Event Group " << grp.name() << std::endl;
}
if (grp.group() == AMDSMI_EVNT_GRP_XGMI_DATA_OUT) {
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_0, 0);
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_1, 1);
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_2, 2);
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_3, 3);
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_4, 4);
utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_5, 5);
} else if (grp.group() == AMDSMI_EVNT_GRP_XGMI) {
utiliz(AMDSMI_EVNT_XGMI_1_BEATS_TX, 1);
utiliz(AMDSMI_EVNT_XGMI_0_BEATS_TX, 0);
}
for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt(); ++evnt) {
IF_VERB(STANDARD) {
std::cout << "\tTesting Event Type " << evnt << std::endl;
}
CountEvents(dv_ind, static_cast<amdsmi_event_type_t>(evnt), &val);
}
}
}
void
TestPerfCntrReadWrite::testEventsSimultaneously(amdsmi_device_handle dv_ind) {
amdsmi_status_t ret;
amdsmi_counter_value_t val;
uint32_t avail_counters;
IF_VERB(STANDARD) {
std::cout << "****************************" << std::endl;
std::cout << "Test events simultaneously (device " <<
dv_ind << ")" << std::endl;
std::cout << "****************************" << std::endl;
}
/* This code is a little convoluted. The reason is that it is meant to test
* having multiple events being used at one time, rather than sequentially
* handling 1 event at a time.
*/
for (PerfCntrEvtGrp grp : s_event_groups) {
ret = amdsmi_dev_counter_group_supported(dv_ind, grp.group());
if (ret == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\tEvent Group " << grp.name() <<
" is not supported. Skipping." << std::endl;
}
continue;
}
IF_VERB(STANDARD) {
std::cout << "Testing Event Group " << grp.name() << std::endl;
}
ret = amdsmi_counter_available_counters_get(dv_ind, grp.group(),
&avail_counters);
IF_VERB(STANDARD) {
std::cout << "Available Counters: " << avail_counters << std::endl;
}
CHK_ERR_ASRT(ret)
std::shared_ptr<amdsmi_event_handle_t> evt_handle =
std::shared_ptr<amdsmi_event_handle_t>(
new amdsmi_event_handle_t[avail_counters]);
uint32_t tmp, j;
uint32_t num_created = 0;
for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt();
evnt += avail_counters) {
IF_VERB(STANDARD) {
std::cout << "\tTesting Event Type " << evnt << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\tCreating events..." << std::endl;
}
for (j = 0; j < avail_counters; ++j) {
tmp = static_cast<amdsmi_event_type_t>(evnt + j);
if (tmp > grp.last_evt()) {
break;
}
IF_VERB(STANDARD) {
std::cout << "\tEvent Type " << tmp << std::endl;
}
ret = amdsmi_dev_counter_create(dv_ind,
static_cast<amdsmi_event_type_t>(tmp), &evt_handle.get()[j]);
CHK_ERR_ASRT(ret)
}
num_created = j;
IF_VERB(STANDARD) {
std::cout << "\tStart Counters..." << std::endl;
}
uint32_t tmp_cntrs;
for (j = 0; j < num_created; ++j) {
tmp = static_cast<amdsmi_event_type_t>(evnt + j);
ret = amdsmi_counter_control(evt_handle.get()[j], AMDSMI_CNTR_CMD_START,
nullptr);
CHK_ERR_ASRT(ret)
ret = amdsmi_counter_available_counters_get(dv_ind, grp.group(),
&tmp_cntrs);
CHK_ERR_ASRT(ret)
ASSERT_EQ(tmp_cntrs, (avail_counters - j - 1));
}
sleep(1);
IF_VERB(STANDARD) {
std::cout << "\tRead Counters..." << std::endl;
}
for (j = 0; j < num_created; ++j) {
tmp = static_cast<amdsmi_event_type_t>(evnt + j);
ret = amdsmi_counter_read(evt_handle.get()[j], &val);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\tCounter: " << tmp << std::endl;
std::cout << "\tSuccessfully read value: " << std::endl;
std::cout << "\t\tValue: " << val.value << std::endl;
std::cout << "\t\tTime Enabled: " << val.time_enabled << std::endl;
std::cout << "\t\tTime Running: " << val.time_running << std::endl;
}
}
for (j = 0; j < num_created; ++j) {
ret = amdsmi_dev_counter_destroy(evt_handle.get()[j]);
CHK_ERR_ASRT(ret)
}
}
}
}
void TestPerfCntrReadWrite::Run(void) {
TestBase::Run();
if (setup_failed_) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
return;
}
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
amdsmi_device_handle dev_handle = device_handles_[dv_ind];
PrintDeviceHeader(dev_handle);
try {
testEventsIndividually(dev_handle);
testEventsSimultaneously(dev_handle);
} catch(amdsmi_status_t r) {
switch (r) {
case AMDSMI_STATUS_NOT_SUPPORTED:
std::cout << "The performance counter event tried is not "
"supported for this device" << std::endl;
break;
default:
std::cout << "Unexpected exception caught with amdsmi "
"return value of " << r << std::endl;
}
} catch(...) {
ASSERT_FALSE("Unexpected exception caught");
}
}
}
@@ -0,0 +1,101 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_
#include <string>
#include "amd_smi_test/test_base.h"
class TestPerfCntrReadWrite : public TestBase {
public:
TestPerfCntrReadWrite();
// @Brief: Destructor for test case of TestPerfCntrReadWrite
virtual ~TestPerfCntrReadWrite();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
private:
void CountEvents(amdsmi_device_handle dv_ind,
amdsmi_event_type_t evnt, amdsmi_counter_value_t *val,
int32_t sleep_sec = 1);
void testEventsIndividually(amdsmi_device_handle dv_ind);
void testEventsSimultaneously(amdsmi_device_handle dv_ind);
};
class PerfCntrEvtGrp {
public:
explicit PerfCntrEvtGrp(amdsmi_event_group_t grp,
uint32_t first, uint32_t last, std::string name);
~PerfCntrEvtGrp();
amdsmi_event_group_t group(void) const { return grp_;}
uint32_t first_evt(void) const {return first_evt_;}
uint32_t last_evt(void) const {return last_evt_;}
uint32_t num_events(void) const {return num_events_;}
std::string name(void) const { return name_;}
private:
amdsmi_event_group_t grp_;
uint32_t first_evt_;
uint32_t last_evt_;
uint32_t num_events_;
std::string name_;
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_
@@ -0,0 +1,230 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <iostream>
#include <string>
#include "gtest/gtest.h"
#include "amd_smi.h"
#include "amd_smi_test/functional/process_info_read.h"
#include "amd_smi_test/test_common.h"
TestProcInfoRead::TestProcInfoRead() : TestBase() {
set_title("AMDSMI Process Info Read Test");
set_description("This test verifies that process information such as the "
"process ID, PASID, etc. can be read properly.");
}
TestProcInfoRead::~TestProcInfoRead(void) {
}
void TestProcInfoRead::SetUp(void) {
TestBase::SetUp();
return;
}
void TestProcInfoRead::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestProcInfoRead::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestProcInfoRead::Close() {
// This will close handles opened within amdsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
static void dumpProcess(amdsmi_process_info_t *p) {
assert(p != nullptr);
std::cout << "\t** ProcessID: " << p->process_id << " ";
std::cout << "PASID: " << p->pasid << " ";
std::cout << std::endl;
}
void TestProcInfoRead::Run(void) {
amdsmi_status_t err;
uint32_t num_proc_found;
uint32_t val_ui32;
amdsmi_process_info_t *procs = nullptr;
TestBase::Run();
if (setup_failed_) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
return;
}
uint32_t num_devices = num_monitor_devs();
CHK_ERR_ASRT(err)
err = amdsmi_compute_process_info_get(nullptr, &num_proc_found);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**Process info. read: Not supported on this machine"
<< std::endl;
return;
}
} else {
CHK_ERR_ASRT(err)
}
} else {
IF_VERB(STANDARD) {
std::cout << "\t** " << std::dec << num_proc_found <<
" GPU processes found" << std::endl;
}
}
if (num_proc_found == 0) {
return;
}
procs = new amdsmi_process_info_t[num_proc_found];
val_ui32 = num_proc_found;
err = amdsmi_compute_process_info_get(procs, &val_ui32);
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_INSUFFICIENT_SIZE) {
IF_VERB(STANDARD) {
std::cout << "\t** " << val_ui32 <<
" processes were read, but more became available that were unread."
<< std::endl;
for (uint32_t i = 0; i < val_ui32; ++i) {
dumpProcess(&procs[i]);
}
return;
}
} else {
CHK_ERR_ASRT(err)
}
} else {
IF_VERB(STANDARD) {
std::cout << "\t** Processes currently using GPU: " << std::endl;
for (uint32_t i = 0; i < val_ui32; ++i) {
dumpProcess(&procs[i]);
}
}
}
// Reset to the number we actually read
num_proc_found = val_ui32;
if (num_proc_found) {
// Allocate the max we expect to get
uint32_t *dev_inds = new uint32_t[num_devices];
uint32_t amt_allocd = num_devices;
for (uint32_t j = 0; j < num_proc_found; j++) {
err = amdsmi_compute_process_gpus_get(procs[j].process_id, dev_inds,
&amt_allocd);
if (err == AMDSMI_STATUS_NOT_FOUND) {
std::cout << "\t** Process " << procs[j].process_id <<
" is no longer present.";
continue;
} else {
CHK_ERR_ASRT(err);
ASSERT_LE(amt_allocd, num_devices);
}
std::cout << "\t** Process " << procs[j].process_id <<
" is using devices with indices: ";
uint32_t i;
if (amt_allocd > 0) {
for (i = 0; i < amt_allocd - 1; ++i) {
std::cout << dev_inds[i] << ", ";
}
std::cout << dev_inds[i];
}
std::cout << std::endl;
// Reset amt_allocd back to the amount acutally allocated
amt_allocd = num_devices;
}
delete []dev_inds;
amdsmi_process_info_t proc_info;
for (uint32_t j = 0; j < num_proc_found; j++) {
memset(&proc_info, 0x0, sizeof(amdsmi_process_info_t));
err = amdsmi_compute_process_info_by_pid_get(procs[j].process_id,
&proc_info);
if (err == AMDSMI_STATUS_NOT_FOUND) {
std::cout <<
"\t** WARNING: amdsmi_compute_process_info_get() found process " <<
procs[j].process_id << ", but subsequently, "
"amdsmi_compute_process_info_by_pid_get() did not"
" find this same process." << std::endl;
} else {
CHK_ERR_ASRT(err)
ASSERT_EQ(proc_info.process_id, procs[j].process_id);
ASSERT_EQ(proc_info.pasid, procs[j].pasid);
std::cout << "\t** Process ID: " <<
procs[j].process_id << " VRAM Usage: " <<
proc_info.vram_usage <<
" SDMA Usage: " <<
proc_info.sdma_usage <<
" Compute Unit Usage: " <<
proc_info.cu_occupancy <<
std::endl;
}
}
}
if (num_proc_found > 1) {
amdsmi_process_info_t tmp_proc;
val_ui32 = 1;
err = amdsmi_compute_process_info_get(&tmp_proc, &val_ui32);
if (err != AMDSMI_STATUS_INSUFFICIENT_SIZE) {
std::cout << "Expected amdsmi_compute_process_info_get() to tell us"
" there are more processes available, but instead go return code " <<
err << std::endl;
}
}
delete []procs;
}
@@ -0,0 +1,73 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_
#include "amd_smi_test/test_base.h"
class TestProcInfoRead : public TestBase {
public:
TestProcInfoRead();
// @Brief: Destructor for test case of TestProcInfoRead
virtual ~TestProcInfoRead();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_
@@ -0,0 +1,149 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <stdint.h>
#include <stddef.h>
#include <iostream>
#include "gtest/gtest.h"
#include "amd_smi.h"
#include "amd_smi_test/functional/xgmi_read_write.h"
#include "amd_smi_test/test_common.h"
TestXGMIReadWrite::TestXGMIReadWrite() : TestBase() {
set_title("AMDSMI XGMI Read/Write Test");
set_description("This test verifies that XGMI error counts can be read"
" properly, and that the count can be reset.");
}
TestXGMIReadWrite::~TestXGMIReadWrite(void) {
}
void TestXGMIReadWrite::SetUp(void) {
TestBase::SetUp();
return;
}
void TestXGMIReadWrite::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestXGMIReadWrite::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestXGMIReadWrite::Close() {
// This will close handles opened within rsmitst utility calls and call
// amdsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
void TestXGMIReadWrite::Run(void) {
amdsmi_status_t err;
amdsmi_xgmi_status_t err_stat;
uint64_t hive_id;
TestBase::Run();
if (setup_failed_) {
IF_VERB(STANDARD) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
}
return;
}
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
auto device = device_handles_[dv_ind];
PrintDeviceHeader(device);
amdsmi_xgmi_info_t info;
err = amdsmi_get_xgmi_info(device, &info);
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**amdsmi_dev_xgmi_hive_id_get() is not supported"
" on this machine" << std::endl;
continue;
} else {
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**XGMI Hive ID : " << std::hex <<
info.xgmi_hive_id << std::endl;
}
}
err = amdsmi_dev_xgmi_error_status(device, &err_stat);
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**XGMI Error Status: Not supported on this machine"
<< std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_xgmi_error_status(device, nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_NOT_SUPPORTED);
continue;
}
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**XGMI Error Status: " <<
static_cast<uint32_t>(err_stat) << std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_xgmi_error_status(device, nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
// TODO(cfree) We need to find a way to generate xgmi errors so this
// test won't be meaningless
err = amdsmi_dev_xgmi_error_reset(device);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
std::cout << "\t**Successfully reset XGMI Error Status: " << std::endl;
}
}
}
@@ -0,0 +1,73 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_
#define TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_
#include "amd_smi_test/test_base.h"
class TestXGMIReadWrite : public TestBase {
public:
TestXGMIReadWrite();
// @Brief: Destructor for test case of TestXGMIReadWrite
virtual ~TestXGMIReadWrite();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_
@@ -5,7 +5,7 @@
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2018, Advanced Micro Devices, Inc.
* Copyright (c) 2022, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
@@ -56,6 +56,11 @@
#include "functional/fan_read.h"
#include "functional/fan_read_write.h"
#include "functional/evt_notif_read_write.h"
#include "functional/perf_cntr_read_write.h"
#include "amd_smi_test/functional/hw_topology_read.h"
#include "functional/xgmi_read_write.h"
#include "functional/api_support_read.h"
#include "functional/process_info_read.h"
/*
#include "functional/temp_read.h"
#include "functional/volt_read.h"
@@ -76,14 +81,9 @@
#include "functional/err_cnt_read.h"
#include "functional/mem_util_read.h"
#include "functional/id_info_read.h"
#include "functional/perf_cntr_read_write.h"
#include "functional/process_info_read.h"
#include "functional/xgmi_read_write.h"
#include "functional/mem_page_info_read.h"
#include "functional/api_support_read.h"
#include "functional/mutual_exclusion.h"
#include "functional/init_shutdown_refcount.h"
#include "amd_smi_test/functional/hw_topology_read.h"
#include "amd_smi_test/functional/gpu_metrics_read.h"
#include "amd_smi_test/functional/metrics_counter_read.h"
#include "amd_smi_test/functional/perf_determinism.h"
@@ -157,6 +157,26 @@ TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) {
TestEvtNotifReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestPerfCntrReadWrite) {
TestPerfCntrReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestXGMIReadWrite) {
TestXGMIReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestAPISupportRead) {
TestAPISupportRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestHWTopologyRead) {
TestHWTopologyRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestProcInfoRead) {
TestProcInfoRead tst;
RunGenericTest(&tst);
}
/*
TEST(amdsmitstReadOnly, TempRead) {
TestTempRead tst;
@@ -230,18 +250,6 @@ TEST(amdsmitstReadOnly, TestIdInfoRead) {
TestIdInfoRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestPerfCntrReadWrite) {
TestPerfCntrReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestProcInfoRead) {
TestProcInfoRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestHWTopologyRead) {
TestHWTopologyRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestGpuMetricsRead) {
TestGpuMetricsRead tst;
RunGenericTest(&tst);
@@ -254,18 +262,11 @@ TEST(amdsmitstReadWrite, TestPerfDeterminism) {
TestPerfDeterminism tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadWrite, TestXGMIReadWrite) {
TestXGMIReadWrite tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestMemPageInfoRead) {
TestMemPageInfoRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestAPISupportRead) {
TestAPISupportRead tst;
RunGenericTest(&tst);
}
TEST(amdsmitstReadOnly, TestMutualExclusion) {
TestMutualExclusion tst;
SetFlags(&tst);
@@ -274,6 +275,7 @@ TEST(amdsmitstReadOnly, TestMutualExclusion) {
tst.Run();
RunCustomTestEpilog(&tst);
}
TEST(amdsmitstReadOnly, TestConcurrentInit) {
TestConcurrentInit tst;
SetFlags(&tst);