diff --git a/projects/amdsmi/amd_smi/include/amd_smi.h b/projects/amdsmi/amd_smi/include/amd_smi.h index 956e4ad63d..ae8995d7dc 100644 --- a/projects/amdsmi/amd_smi/include/amd_smi.h +++ b/projects/amdsmi/amd_smi/include/amd_smi.h @@ -107,11 +107,26 @@ typedef enum device_type { typedef enum amdsmi_status { AMDSMI_STATUS_SUCCESS = 0, /**< Call succeeded */ AMDSMI_STATUS_INVAL, /**< Invalid parameters */ - AMDSMI_STATUS_OUT_OF_RESOURCES, /**< Not enough memory */ AMDSMI_STATUS_NOT_SUPPORTED, /**< Command not supported */ + AMDSMI_STATUS_FILE_ERROR, /**< Problem accessing a file. */ AMDSMI_STATUS_NO_PERM, /**< Permission Denied */ - AMDSMI_STATUS_BUSY, /**< Device busy */ + AMDSMI_STATUS_OUT_OF_RESOURCES, /**< Not enough memory */ + AMDSMI_STATUS_INTERNAL_EXCEPTION, /**< An internal exception was caught */ + AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS, /**< The provided input is out of allowable or safe range */ + AMDSMI_STATUS_INIT_ERROR, /**< An error occurred when initializing internal data structures */ + AMDSMI_STATUS_NOT_YET_IMPLEMENTED, /**< Not implemented yet */ AMDSMI_STATUS_NOT_FOUND, /**< Device Not found */ + AMDSMI_STATUS_INSUFFICIENT_SIZE, /**< Not enough resources were available for the operation */ + AMDSMI_STATUS_INTERRUPT, /**< An interrupt occurred during execution of function */ + AMDSMI_STATUS_UNEXPECTED_SIZE, /**< An unexpected amount of data was read */ + AMDSMI_STATUS_NO_DATA, /**< No data was found for a given input */ + AMDSMI_STATUS_UNEXPECTED_DATA, /**< The data read or provided to function is not what was expected */ + AMDSMI_STATUS_BUSY, /**< Device busy */ + AMDSMI_STATUS_REFCOUNT_OVERFLOW, /**< An internal reference counter exceeded INT32_MAX */ + AMDSMI_LIB_START = 1000, + AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START, //!< Fail to load lib + AMDSMI_STATUS_FAIL_LOAD_SYMBOL, + AMDSMI_STATUS_DRM_ERROR, //!< Error when call libdrm AMDSMI_STATUS_IO, /**< I/O Error */ AMDSMI_STATUS_FAULT, /**< Bad address */ AMDSMI_STATUS_API_FAILED, /**< API call failed */ @@ -119,21 +134,6 @@ typedef enum amdsmi_status { AMDSMI_STATUS_NO_SLOT, /**< No more free slot */ AMDSMI_STATUS_RETRY, /**< Retry operation */ AMDSMI_STATUS_NOT_INIT, /**< Device not initialized */ - AMDSMI_STATUS_INTERNAL_EXCEPTION, /**< An internal exception was caught */ - AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS, /**< The provided input is out of */ - AMDSMI_STATUS_NOT_YET_IMPLEMENTED, /**< The requested function has not - yet been implemented in the - current system for the current devices */ - AMDSMI_STATUS_INSUFFICIENT_SIZE, /**< Not enough resources were available for the operation */ - AMDSMI_STATUS_INTERRUPT, /**< An interrupt occurred during execution of function */ - AMDSMI_STATUS_UNEXPECTED_SIZE, /**< An unexpected amount of data was read */ - AMDSMI_STATUS_NO_DATA, /**< No data was found for a given input */ - AMDSMI_STATUS_UNEXPECTED_DATA, /**< The data read or provided to function is not what was expected */ - AMDSMI_STATUS_REFCOUNT_OVERFLOW, /**< An internal reference counter exceeded INT32_MAX */ - AMDSMI_LIB_START = 1000, - AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START, //!< Fail to load lib - AMDSMI_STATUS_FAIL_LOAD_SYMBOL, - AMDSMI_STATUS_DRM_ERROR, //!< Error when call libdrm AMDSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred } amdsmi_status_t; diff --git a/projects/amdsmi/amd_smi/src/amd_smi.cc b/projects/amdsmi/amd_smi/src/amd_smi.cc index 9bd70e973b..1c8d7067c5 100644 --- a/projects/amdsmi/amd_smi/src/amd_smi.cc +++ b/projects/amdsmi/amd_smi/src/amd_smi.cc @@ -64,14 +64,13 @@ // TODO(bliu): One to one map to all status code static amdsmi_status_t rsmi_to_amdsmi_status(rsmi_status_t status) { - if (status == RSMI_STATUS_NO_DATA) return AMDSMI_STATUS_NO_DATA; return static_cast(status); } -template -amdsmi_status_t rsmi_wrapper(F && f, - amdsmi_device_handle device_handle, Args &&... args) { - if (device_handle == nullptr) return AMDSMI_STATUS_INVAL; +static amdsmi_status_t get_gpu_device_from_handle(amdsmi_device_handle device_handle, + amd::smi::AMDSmiGPUDevice** gpudevice) { + if (device_handle == nullptr || gpudevice == nullptr) + return AMDSMI_STATUS_INVAL; amd::smi::AMDSmiDevice* device = nullptr; amdsmi_status_t r = amd::smi::AMDSmiSystem::getInstance() @@ -79,17 +78,27 @@ amdsmi_status_t rsmi_wrapper(F && f, if (r != AMDSMI_STATUS_SUCCESS) return r; if (device->get_device_type() == AMD_GPU) { - amd::smi::AMDSmiGPUDevice* gpu_device = - static_cast(device_handle); - uint32_t gpu_index = gpu_device->get_gpu_id(); - auto r = std::forward(f)(gpu_index, - std::forward(args)...); - return rsmi_to_amdsmi_status(r); + *gpudevice = static_cast(device_handle); + return AMDSMI_STATUS_SUCCESS; } return AMDSMI_STATUS_NOT_SUPPORTED; } +template +amdsmi_status_t rsmi_wrapper(F && f, + amdsmi_device_handle device_handle, Args &&... args) { + amd::smi::AMDSmiGPUDevice* gpu_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpu_device); + if (r != AMDSMI_STATUS_SUCCESS) return r; + + + uint32_t gpu_index = gpu_device->get_gpu_id(); + auto rstatus = std::forward(f)(gpu_index, + std::forward(args)...); + return rsmi_to_amdsmi_status(rstatus); +} + amdsmi_status_t amdsmi_init(uint64_t flags) { return amd::smi::AMDSmiSystem::getInstance().init(flags); @@ -407,8 +416,9 @@ amdsmi_event_notification_get(int timeout_ms, std::vector r_data(*num_elem); rsmi_status_t r = rsmi_event_notification_get( timeout_ms, num_elem, &r_data[0]); - if (r != RSMI_STATUS_SUCCESS) + if (r != RSMI_STATUS_SUCCESS) { return rsmi_to_amdsmi_status(r); + } // convert output for (uint32_t i=0; i < *num_elem; i++) { @@ -419,8 +429,7 @@ amdsmi_event_notification_get(int timeout_ms, MAX_EVENT_NOTIFICATION_MSG_SIZE); amdsmi_status_t r = amd::smi::AMDSmiSystem::getInstance() .gpu_index_to_handle(rsmi_data.dv_ind, &(data[i].device_handle)); - if (r != AMDSMI_STATUS_SUCCESS) - return r; + if (r != AMDSMI_STATUS_SUCCESS) return r; } return AMDSMI_STATUS_SUCCESS; @@ -430,3 +439,212 @@ amdsmi_status_t amdsmi_event_notification_stop( amdsmi_device_handle device_handle) { return rsmi_wrapper(rsmi_event_notification_stop, device_handle); } + +amdsmi_status_t amdsmi_dev_counter_group_supported( + amdsmi_device_handle device_handle, amdsmi_event_group_t group) { + return rsmi_wrapper(rsmi_dev_counter_group_supported, device_handle, + static_cast(group)); +} + +amdsmi_status_t amdsmi_dev_counter_create(amdsmi_device_handle device_handle, + amdsmi_event_type_t type, amdsmi_event_handle_t *evnt_handle) { + return rsmi_wrapper(rsmi_dev_counter_create, device_handle, + static_cast(type), + static_cast(evnt_handle)); +} + +amdsmi_status_t amdsmi_dev_counter_destroy(amdsmi_event_handle_t evnt_handle) { + rsmi_status_t r = rsmi_dev_counter_destroy( + static_cast(evnt_handle)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t amdsmi_counter_control(amdsmi_event_handle_t evt_handle, + amdsmi_counter_command_t cmd, void *cmd_args) { + rsmi_status_t r = rsmi_counter_control( + static_cast(evt_handle), + static_cast(cmd), cmd_args); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_counter_read(amdsmi_event_handle_t evt_handle, + amdsmi_counter_value_t *value) { + rsmi_status_t r = rsmi_counter_read( + static_cast(evt_handle), + reinterpret_cast(value)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_counter_available_counters_get(amdsmi_device_handle device_handle, + amdsmi_event_group_t grp, uint32_t *available) { + return rsmi_wrapper(rsmi_counter_available_counters_get, device_handle, + static_cast(grp), + available); +} + +amdsmi_status_t +amdsmi_topo_get_numa_node_number(amdsmi_device_handle device_handle, uint32_t *numa_node) { + return rsmi_wrapper(rsmi_topo_get_numa_node_number, device_handle, numa_node); +} + +amdsmi_status_t +amdsmi_topo_get_link_weight(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst, + uint64_t *weight) { + amd::smi::AMDSmiGPUDevice* src_device = nullptr; + amd::smi::AMDSmiGPUDevice* dst_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + r = get_gpu_device_from_handle(device_handle_dst, &dst_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + auto rstatus = rsmi_topo_get_link_weight(src_device->get_gpu_id(), dst_device->get_gpu_id(), + weight); + return rsmi_to_amdsmi_status(rstatus); +} + +amdsmi_status_t +amdsmi_minmax_bandwidth_get(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst, + uint64_t *min_bandwidth, uint64_t *max_bandwidth) { + amd::smi::AMDSmiGPUDevice* src_device = nullptr; + amd::smi::AMDSmiGPUDevice* dst_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + r = get_gpu_device_from_handle(device_handle_dst, &dst_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + auto rstatus = rsmi_minmax_bandwidth_get(src_device->get_gpu_id(), dst_device->get_gpu_id(), + min_bandwidth, max_bandwidth); + return rsmi_to_amdsmi_status(rstatus); +} + +amdsmi_status_t +amdsmi_topo_get_link_type(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst, + uint64_t *hops, AMDSMI_IO_LINK_TYPE *type) { + amd::smi::AMDSmiGPUDevice* src_device = nullptr; + amd::smi::AMDSmiGPUDevice* dst_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + r = get_gpu_device_from_handle(device_handle_dst, &dst_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + auto rstatus = rsmi_topo_get_link_type(src_device->get_gpu_id(), dst_device->get_gpu_id(), + hops, reinterpret_cast(type)); + return rsmi_to_amdsmi_status(rstatus); +} + +amdsmi_status_t +amdsmi_is_P2P_accessible(amdsmi_device_handle device_handle_src, amdsmi_device_handle device_handle_dst, + bool *accessible) { + amd::smi::AMDSmiGPUDevice* src_device = nullptr; + amd::smi::AMDSmiGPUDevice* dst_device = nullptr; + amdsmi_status_t r = get_gpu_device_from_handle(device_handle_src, &src_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + r = get_gpu_device_from_handle(device_handle_dst, &dst_device); + if (r != AMDSMI_STATUS_SUCCESS) + return r; + auto rstatus = rsmi_is_P2P_accessible(src_device->get_gpu_id(), dst_device->get_gpu_id(), + accessible); + return rsmi_to_amdsmi_status(rstatus); +} + +// TODO(bliu) : other xgmi related information +amdsmi_status +amdsmi_get_xgmi_info(amdsmi_device_handle device_handle, amdsmi_xgmi_info_t *info) { + if (info == nullptr) + return AMDSMI_STATUS_INVAL; + return rsmi_wrapper(rsmi_dev_xgmi_hive_id_get, device_handle, + &(info->xgmi_hive_id)); +} + +amdsmi_status_t +amdsmi_dev_xgmi_error_status(amdsmi_device_handle device_handle, amdsmi_xgmi_status_t *status) { + return rsmi_wrapper(rsmi_dev_xgmi_error_status, device_handle, + reinterpret_cast(status)); +} + +amdsmi_status_t +amdsmi_dev_xgmi_error_reset(amdsmi_device_handle device_handle) { + return rsmi_wrapper(rsmi_dev_xgmi_error_reset, device_handle); +} + +amdsmi_status_t +amdsmi_dev_supported_func_iterator_open(amdsmi_device_handle device_handle, + amdsmi_func_id_iter_handle_t *handle) { + if (handle == nullptr) + return AMDSMI_STATUS_INVAL; + return rsmi_wrapper(rsmi_dev_supported_func_iterator_open, device_handle, + reinterpret_cast(handle)); +} + +amdsmi_status_t +amdsmi_dev_supported_variant_iterator_open(amdsmi_func_id_iter_handle_t obj_h, + amdsmi_func_id_iter_handle_t *var_iter) { + if (var_iter == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_dev_supported_variant_iterator_open( + reinterpret_cast(obj_h), + reinterpret_cast(var_iter)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_func_iter_next(amdsmi_func_id_iter_handle_t handle) { + auto r = rsmi_func_iter_next( + reinterpret_cast(handle)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_dev_supported_func_iterator_close(amdsmi_func_id_iter_handle_t *handle) { + if (handle == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_dev_supported_func_iterator_close( + reinterpret_cast(handle)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_func_iter_value_get(amdsmi_func_id_iter_handle_t handle, + amdsmi_func_id_value_t *value) { + if (value == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_func_iter_value_get( + reinterpret_cast(handle), + reinterpret_cast(value)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_compute_process_info_get(amdsmi_process_info_t *procs, uint32_t *num_items) { + if (num_items == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_compute_process_info_get( + reinterpret_cast(procs), + num_items); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_compute_process_info_by_pid_get(uint32_t pid, amdsmi_process_info_t *proc) { + if (proc == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_compute_process_info_by_pid_get(pid, + reinterpret_cast(proc)); + return rsmi_to_amdsmi_status(r); +} + +amdsmi_status_t +amdsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices, + uint32_t *num_devices) { + if (dv_indices == nullptr || num_devices == nullptr) + return AMDSMI_STATUS_INVAL; + auto r = rsmi_compute_process_gpus_get(pid, dv_indices, num_devices); + return rsmi_to_amdsmi_status(r); +} + diff --git a/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude b/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude new file mode 100644 index 0000000000..f498de400f --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude @@ -0,0 +1,66 @@ +declare -A FILTER + +# FILTER is meant to be used with a negative gtest filter + +# Designate the tests to be excluded from all test runs first, +# followed by tests that should be excluded by device. + +# Permanent exclusions +# These tests are included for debugging, but are not executed in normal +# execution on any ASIC: +PERMANENT_BLACKLIST_ALL_ASICS= + +# This is the temporary blacklist for all ASICs. This is to be used when a test +# is failing consistently +TEMPORARY_BLACKLIST_ALL_ASICS= + +if [ -z $PERMANENT_BLACKLIST_ALL_ASICS -a -z $TEMPORARY_BLACKLIST_ALL_ASICS ]; then + BLACKLIST_ALL_ASICS= +else + BLACKLIST_ALL_ASICS=\ + "$PERMANENT_BLACKLIST_ALL_ASICS:"\ + "$TEMPORARY_BLACKLIST_ALL_ASICS" +fi + +# Device specific blacklists +FILTER[vega10]=\ +$BLACKLIST_ALL_ASICS + +# SWDEV-207510 +FILTER[vega20]=\ +$BLACKLIST_ALL_ASICS\ +"amdsmitstReadOnly.TestFrequenciesRead:"\ +"amdsmitstReadOnly.TestProcInfoRead" + +# SWDEV-207510 +FILTER[arcturus]=\ +$BLACKLIST_ALL_ASICS\ +"amdsmitstReadOnly.TestFrequenciesRead:"\ +"amdsmitstReadWrite.TestFrequenciesReadWrite:"\ +"amdsmitstReadOnly.TestProcInfoRead" + +# SWDEV-306889 +FILTER[aldebaran]=\ +$BLACKLIST_ALL_ASICS\ +"amdsmitstReadOnly.FanRead:"\ +"amdsmitstReadOnly.TestVoltCurvRead:"\ +"amdsmitstReadOnly.TestFrequenciesRead:"\ +"amdsmitstReadWrite.FanReadWrite:"\ +"amdsmitstReadWrite.TestFrequenciesReadWrite:"\ +"amdsmitstReadWrite.TestPciReadWrite:"\ +"amdsmitstReadWrite.TestPowerReadWrite" + +# SWDEV-319795 +FILTER[sienna_cichlid]=\ +$BLACKLIST_ALL_ASICS\ +"amdsmitstReadWrite.TestPerfLevelReadWrite" + +# SWDEV-321166 +FILTER[virtualization]=\ +$BLACKLIST_ALL_ASICS\ +"amdsmitstReadOnly.TestOverdriveRead:"\ +"amdsmitstReadOnly.TestGPUBusyRead:"\ +"amdsmitstReadWrite.FanReadWrite:"\ +"amdsmitstReadWrite.TestOverdriveReadWrite:"\ +"amdsmitstReadWrite.TestPowerReadWrite:"\ +"amdsmitstReadWrite.TestPowerCapReadWrite" \ No newline at end of file diff --git a/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.cc new file mode 100755 index 0000000000..7cc8e52436 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.cc @@ -0,0 +1,188 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "amd_smi.h" +#include "amd_smi_test/functional/api_support_read.h" +#include "amd_smi_test/test_common.h" +#include "amd_smi_test/test_utils.h" + +TestAPISupportRead::TestAPISupportRead() : TestBase() { + set_title("AMDSMI API Support Read Test"); + set_description("This test verifies that the supported APIs are corretly " + "identified."); +} + +TestAPISupportRead::~TestAPISupportRead(void) { +} + +void TestAPISupportRead::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestAPISupportRead::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestAPISupportRead::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestAPISupportRead::Close() { + // This will close handles opened within amdsmitst utility calls and call + // amdsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +void TestAPISupportRead::Run(void) { + amdsmi_status_t err; + + TestBase::Run(); + if (setup_failed_) { + IF_VERB(STANDARD) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + } + return; + } + + amdsmi_func_id_iter_handle_t iter_handle, var_iter, sub_var_iter; + amdsmi_func_id_value_t value; + + for (uint32_t x = 0; x < num_iterations(); ++x) { + for (uint32_t i = 0; i < num_monitor_devs(); ++i) { + IF_VERB(STANDARD) { + PrintDeviceHeader(device_handles_[i]); + std::cout << "Supported AMDSMI Functions:" << std::endl; + std::cout << "\tVariants (Monitors)" << std::endl; + } + err = amdsmi_dev_supported_func_iterator_open(device_handles_[i], &iter_handle); + CHK_ERR_ASRT(err) + + while (1) { + err = amdsmi_func_iter_value_get(iter_handle, &value); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "Function Name: " << value.name << std::endl; + } + err = amdsmi_dev_supported_variant_iterator_open(iter_handle, &var_iter); + if (err != AMDSMI_STATUS_NO_DATA) { + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\tVariants/Monitors: "; + } + while (1) { + err = amdsmi_func_iter_value_get(var_iter, &value); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + if (value.id == AMDSMI_DEFAULT_VARIANT) { + std::cout << "Default Variant "; + } else { + std::cout << value.id; + } + std::cout << " ("; + } + err = + amdsmi_dev_supported_variant_iterator_open(var_iter, &sub_var_iter); + if (err != AMDSMI_STATUS_NO_DATA) { + CHK_ERR_ASRT(err) + + while (1) { + err = amdsmi_func_iter_value_get(sub_var_iter, &value); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << value.id << ", "; + } + err = amdsmi_func_iter_next(sub_var_iter); + + if (err == AMDSMI_STATUS_NO_DATA) { + break; + } + CHK_ERR_ASRT(err) + } + err = amdsmi_dev_supported_func_iterator_close(&sub_var_iter); + CHK_ERR_ASRT(err) + } + + IF_VERB(STANDARD) { + std::cout << "), "; + } + err = amdsmi_func_iter_next(var_iter); + + if (err == AMDSMI_STATUS_NO_DATA) { + break; + } + CHK_ERR_ASRT(err) + } + IF_VERB(STANDARD) { + std::cout << std::endl; + } + err = amdsmi_dev_supported_func_iterator_close(&var_iter); + CHK_ERR_ASRT(err) + } + + err = amdsmi_func_iter_next(iter_handle); + + if (err == AMDSMI_STATUS_NO_DATA) { + break; + } + CHK_ERR_ASRT(err) + + // err = amdsmi_dev_supported_variant_iterator_open(iter_handle, &var_iter); + // + } + err = amdsmi_dev_supported_func_iterator_close(&iter_handle); + CHK_ERR_ASRT(err) + } + } +} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.h b/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.h new file mode 100755 index 0000000000..dc68df3995 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/api_support_read.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_ + +#include "amd_smi_test/test_base.h" + +class TestAPISupportRead : public TestBase { + public: + TestAPISupportRead(); + + // @Brief: Destructor for test case of TestAPISupportRead + virtual ~TestAPISupportRead(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_API_SUPPORT_READ_H_ diff --git a/projects/amdsmi/tests/amd_smi_test/functional/evt_notif_read_write.cc b/projects/amdsmi/tests/amd_smi_test/functional/evt_notif_read_write.cc index ae086cfe4f..e948f9cf80 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/evt_notif_read_write.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/evt_notif_read_write.cc @@ -94,7 +94,8 @@ void TestEvtNotifReadWrite::Run(void) { if (setup_failed_) { IF_VERB(STANDARD) { - std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + std::cout << "** SetUp Failed for this test. Skipping.**" << + std::endl; } return; } @@ -112,7 +113,8 @@ void TestEvtNotifReadWrite::Run(void) { if (ret == AMDSMI_STATUS_NOT_SUPPORTED) { IF_VERB(STANDARD) { std::cout << - "Event notification is not supported for this driver version." << std::endl; + "Event notification is not supported for this driver version." << + std::endl; } return; } @@ -121,8 +123,8 @@ void TestEvtNotifReadWrite::Run(void) { ASSERT_EQ(ret, AMDSMI_STATUS_SUCCESS); } + amdsmi_evt_notification_data_t data[10]; uint32_t num_elem = 10; - amdsmi_evt_notification_data_t data[num_elem]; bool read_again = false; ret = amdsmi_event_notification_get(10000, &num_elem, data); diff --git a/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc new file mode 100755 index 0000000000..d87a7edaae --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.cc @@ -0,0 +1,306 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include +#include + +#include "gtest/gtest.h" +#include "amd_smi.h" +#include "amd_smi_test/functional/hw_topology_read.h" +#include "amd_smi_test/test_common.h" + +typedef struct { + std::string type; + uint64_t hops; + uint64_t weight; + bool accessible; +} gpu_link_t; + +TestHWTopologyRead::TestHWTopologyRead() : TestBase() { + set_title("AMDSMI Hardware Topology Read Test"); + set_description( + "This test verifies that Hardware Topology can be read properly."); +} + +TestHWTopologyRead::~TestHWTopologyRead(void) { +} + +void TestHWTopologyRead::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestHWTopologyRead::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestHWTopologyRead::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestHWTopologyRead::Close() { + // This will close handles opened within amdsmitst utility calls and call + // amdsmi_shut_down(), so it should be done after other cleanup + TestBase::Close(); +} + +void TestHWTopologyRead::Run(void) { + amdsmi_status_t err; + uint32_t i, j; + + TestBase::Run(); + if (setup_failed_) { + IF_VERB(STANDARD) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + } + return; + } + + uint32_t num_devices = num_monitor_devs(); + + // gpu_link_t gpu_links[num_devices][num_devices]; + std::vector> gpu_links(num_devices, + std::vector(num_devices)); + // uint32_t numa_numbers[num_devices]; + std::vector numa_numbers(num_devices); + + for (uint32_t dv_ind = 0; dv_ind < num_devices; ++dv_ind) { + amdsmi_device_handle dev_handle = device_handles_[dv_ind]; + err = amdsmi_topo_get_numa_node_number(dev_handle, &numa_numbers[dv_ind]); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << + "\t**Numa Node Number. read: Not supported on this machine" << + std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } + } + + for (uint32_t dv_ind_src = 0; dv_ind_src < num_devices; dv_ind_src++) { + for (uint32_t dv_ind_dst = 0; dv_ind_dst < num_devices; dv_ind_dst++) { + if (dv_ind_src == dv_ind_dst) { + gpu_links[dv_ind_src][dv_ind_dst].type = "X"; + gpu_links[dv_ind_src][dv_ind_dst].hops = 0; + gpu_links[dv_ind_src][dv_ind_dst].weight = 0; + gpu_links[dv_ind_src][dv_ind_dst].accessible = true; + } else { + AMDSMI_IO_LINK_TYPE type; + err = amdsmi_topo_get_link_type(device_handles_[dv_ind_src], + device_handles_[dv_ind_dst], + &gpu_links[dv_ind_src][dv_ind_dst].hops, &type); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << + "\t**Link Type. read: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + switch (type) { + case AMDSMI_IOLINK_TYPE_PCIEXPRESS: + gpu_links[dv_ind_src][dv_ind_dst].type = "PCIE"; + break; + + case AMDSMI_IOLINK_TYPE_XGMI: + gpu_links[dv_ind_src][dv_ind_dst].type = "XGMI"; + break; + + default: + gpu_links[dv_ind_src][dv_ind_dst].type = "XXXX"; + IF_VERB(STANDARD) { + std::cout << "\t**Invalid IO LINK type. type=" << type << + std::endl; + } + } + } + err = amdsmi_topo_get_link_weight(device_handles_[dv_ind_src], + device_handles_[dv_ind_dst], + &gpu_links[dv_ind_src][dv_ind_dst].weight); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << + "\t**Link Weight. read: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } + err = amdsmi_is_P2P_accessible(device_handles_[dv_ind_src], + device_handles_[dv_ind_dst], + &gpu_links[dv_ind_src][dv_ind_dst].accessible); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << + "\t**P2P Access. check: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } + } + } + } + + IF_NVERB(STANDARD) { + return; + } + + std::cout << "**NUMA node number of GPUs**" << std::endl; + std::cout << std::setw(12) << std::left <<"GPU#"; + std::cout <<"NUMA node number"; + std::cout << std::endl; + for (i = 0; i < num_devices; ++i) { + std::cout << std::setw(12) << std::left << i; + std::cout << numa_numbers[i]; + std::cout << std::endl; + } + std::cout << std::endl; + std::cout << std::endl; + + std::string tmp; + std::cout << "**Type between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + } else { + std::cout << std::setw(12) << std::left << gpu_links[i][j].type; + } + } + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "**Hops between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + } else { + std::cout << std::setw(12) << std::left << gpu_links[i][j].hops; + } + } + std::cout << std::endl; + } + std::cout << std::endl; + + std::cout << "**Weight between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + if (i == j) { + std::cout << std::setw(12) << std::left << "X"; + } else { + std::cout << std::setw(12) << std::left << gpu_links[i][j].weight; + } + } + std::cout << std::endl; + } + std::cout << std::endl; + std::cout << "**Access between two GPUs**" << std::endl; + std::cout << " "; + for (i = 0; i < num_devices; ++i) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(12) << std::left << tmp; + } + std::cout << std::endl; + for (i = 0; i < num_devices; i++) { + tmp = "GPU" + std::to_string(i); + std::cout << std::setw(6) << std::left << tmp; + for (j = 0; j < num_devices; j++) { + std::cout << std::boolalpha; + std::cout << std::setw(12) << std::left << gpu_links[i][j].accessible; + } + std::cout << std::endl; + } + std::cout << std::endl; +} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.h b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.h new file mode 100755 index 0000000000..d629e56aa9 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/hw_topology_read.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_ + +#include "amd_smi_test/test_base.h" + +class TestHWTopologyRead : public TestBase { + public: + TestHWTopologyRead(); + + // @Brief: Destructor for test case of TestHWTopologyRead + virtual ~TestHWTopologyRead(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrieve the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_HW_TOPOLOGY_READ_H_ diff --git a/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.cc b/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.cc new file mode 100755 index 0000000000..52946a1e0a --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.cc @@ -0,0 +1,375 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "amd_smi.h" +#include "amd_smi_test/functional/perf_cntr_read_write.h" +#include "amd_smi_test/test_common.h" + +PerfCntrEvtGrp::PerfCntrEvtGrp(amdsmi_event_group_t grp, uint32_t first, + uint32_t last, std::string name) : grp_(grp), first_evt_(first), + last_evt_(last), name_(name) { + num_events_ = last_evt_ - first_evt_ + 1; +} + +PerfCntrEvtGrp::~PerfCntrEvtGrp() {} + +// Add new event groups to test here +#define PC_EVT_GRP(SHRT, NAME) \ + PerfCntrEvtGrp(AMDSMI_EVNT_GRP_##SHRT, AMDSMI_EVNT_##SHRT##_FIRST, \ + AMDSMI_EVNT_##SHRT##_LAST, NAME) +static const std::vector s_event_groups = { + PC_EVT_GRP(XGMI, "XGMI"), + PC_EVT_GRP(XGMI_DATA_OUT, "XGMI_DATA_OUT") +}; + +TestPerfCntrReadWrite::TestPerfCntrReadWrite() : TestBase() { + set_title("AMDSMI Performance Counter Read/Write Test"); + set_description("The Performance counter tests verify that performance" + " counters can be controlled and read properly."); +} + +TestPerfCntrReadWrite::~TestPerfCntrReadWrite(void) { +} + +void TestPerfCntrReadWrite::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestPerfCntrReadWrite::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestPerfCntrReadWrite::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestPerfCntrReadWrite::Close() { + // This will close handles opened within amdsmitst utility calls and call + // amdsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +#define AMDSMI_EVNT_ENUM_FIRST(GRP_NAME) AMDSMI_EVNT_##GRP_NAME##_FIRST +#define AMDSMI_EVNT_ENUM_LAST(GRP_NAME) AMDSMI_EVNT_##GRP_NAME##_LAST + +// Refactor this to handle different event groups once we have > 1 event group + +void TestPerfCntrReadWrite::CountEvents(amdsmi_device_handle dv_ind, + amdsmi_event_type_t evnt, amdsmi_counter_value_t *val, int32_t sleep_sec) { + amdsmi_event_handle_t evt_handle; + amdsmi_status_t ret; + + ret = amdsmi_dev_counter_create(dv_ind, + static_cast(evnt), &evt_handle); + CHK_ERR_ASRT(ret) + + // Note that amdsmi_dev_counter_create() should never return + // AMDSMI_STATUS_NOT_SUPPORTED. It will return AMDSMI_STATUS_OUT_OF_RESOURCES + // if it is unable to create a counter. + ret = amdsmi_dev_counter_create(dv_ind, + static_cast(evnt), nullptr); + ASSERT_EQ(ret, AMDSMI_STATUS_INVAL); + + ret = amdsmi_counter_control(evt_handle, AMDSMI_CNTR_CMD_START, nullptr); + if (ret == AMDSMI_STATUS_NOT_SUPPORTED) { + std::cout << "amdsmi_counter_control() returned " + "AMDSMI_STATUS_NOT_SUPPORTED" << std::endl; + throw AMDSMI_STATUS_NOT_SUPPORTED; + } else { + CHK_ERR_ASRT(ret) + } + sleep(sleep_sec); + + ret = amdsmi_counter_read(evt_handle, val); + CHK_ERR_ASRT(ret) + + IF_VERB(STANDARD) { + std::cout << "\t\t\tValue: " << val->value << std::endl; + std::cout << "\t\t\tTime Enabled (nS): " << val->time_enabled << std::endl; + std::cout << "\t\t\tTime Running (nS): " << val->time_running << std::endl; + std::cout << "\t\t\tEvents/Second Running: " << + val->value/static_cast(val->time_running) << std::endl; + } + ret = amdsmi_dev_counter_destroy(evt_handle); + CHK_ERR_ASRT(ret) +} + +static const uint64_t kGigByte = 1073741824; // 1024^3 +static const uint64_t kGig = 1000000000; + +static const uint64_t kVg20Level1Bandwidth = 23; // 23 GB/sec + + +void +TestPerfCntrReadWrite::testEventsIndividually(amdsmi_device_handle dv_ind) { + amdsmi_status_t ret; + amdsmi_counter_value_t val; + uint64_t throughput; + + std::cout << "Test events sequentially (device " << + dv_ind << ")" << std::endl; + + auto utiliz = [&](amdsmi_event_type_t evt, uint32_t chan) { + IF_VERB(STANDARD) { + std::cout << "****************************" << std::endl; + std::cout << "Test XGMI Link Utilization (channel " << + chan << ")" << std::endl; + std::cout << "****************************" << std::endl; + std::cout << "Assumed Level 1 Bandwidth: " << + kVg20Level1Bandwidth << "GB/sec" << std::endl; + } + uint32_t tmp_verbosity = verbosity(); + set_verbosity(0); + for (int i = 0; i < 5; ++i) { + std::cout << "\t\tPass " << i << ":" << std::endl; + + CountEvents(dv_ind, evt, &val, 1); + double coll_time_sec = static_cast(val.time_running)/kGig; + throughput = (val.value * 32)/coll_time_sec; + std::cout << "\t\t\tCollected events for " << coll_time_sec << + " seconds" << std::endl; + std::cout << "\t\t\tEvents collected: " << val.value << std::endl; + std::cout << "\t\t\tXGMI throughput: " << throughput << + " bytes/second" << std::endl; + std::cout << "\t\t\tXGMI Channel Utilization: " << + 100*throughput/static_cast(kVg20Level1Bandwidth*kGigByte) << + "%" << std::endl; + std::cout << "\t\t\t****" << std::endl; + } + set_verbosity(tmp_verbosity); + }; + + + IF_VERB(STANDARD) { + std::cout << "****************************" << std::endl; + std::cout << "Test each event individually" << std::endl; + std::cout << "****************************" << std::endl; + } + for (PerfCntrEvtGrp grp : s_event_groups) { + ret = amdsmi_dev_counter_group_supported(dv_ind, grp.group()); + if (ret == AMDSMI_STATUS_NOT_SUPPORTED) { + continue; + } + + IF_VERB(STANDARD) { + std::cout << "Testing Event Group " << grp.name() << std::endl; + } + if (grp.group() == AMDSMI_EVNT_GRP_XGMI_DATA_OUT) { + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_0, 0); + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_1, 1); + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_2, 2); + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_3, 3); + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_4, 4); + utiliz(AMDSMI_EVNT_XGMI_DATA_OUT_5, 5); + } else if (grp.group() == AMDSMI_EVNT_GRP_XGMI) { + utiliz(AMDSMI_EVNT_XGMI_1_BEATS_TX, 1); + utiliz(AMDSMI_EVNT_XGMI_0_BEATS_TX, 0); + } + for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt(); ++evnt) { + IF_VERB(STANDARD) { + std::cout << "\tTesting Event Type " << evnt << std::endl; + } + CountEvents(dv_ind, static_cast(evnt), &val); + } + } +} + +void +TestPerfCntrReadWrite::testEventsSimultaneously(amdsmi_device_handle dv_ind) { + amdsmi_status_t ret; + amdsmi_counter_value_t val; + uint32_t avail_counters; + + IF_VERB(STANDARD) { + std::cout << "****************************" << std::endl; + std::cout << "Test events simultaneously (device " << + dv_ind << ")" << std::endl; + std::cout << "****************************" << std::endl; + } + + /* This code is a little convoluted. The reason is that it is meant to test + * having multiple events being used at one time, rather than sequentially + * handling 1 event at a time. + */ + for (PerfCntrEvtGrp grp : s_event_groups) { + ret = amdsmi_dev_counter_group_supported(dv_ind, grp.group()); + if (ret == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << "\tEvent Group " << grp.name() << + " is not supported. Skipping." << std::endl; + } + continue; + } + + IF_VERB(STANDARD) { + std::cout << "Testing Event Group " << grp.name() << std::endl; + } + + ret = amdsmi_counter_available_counters_get(dv_ind, grp.group(), + &avail_counters); + IF_VERB(STANDARD) { + std::cout << "Available Counters: " << avail_counters << std::endl; + } + CHK_ERR_ASRT(ret) + + std::shared_ptr evt_handle = + std::shared_ptr( + new amdsmi_event_handle_t[avail_counters]); + + uint32_t tmp, j; + uint32_t num_created = 0; + + for (uint32_t evnt = grp.first_evt(); evnt <= grp.last_evt(); + evnt += avail_counters) { + IF_VERB(STANDARD) { + std::cout << "\tTesting Event Type " << evnt << std::endl; + } + IF_VERB(STANDARD) { + std::cout << "\tCreating events..." << std::endl; + } + for (j = 0; j < avail_counters; ++j) { + tmp = static_cast(evnt + j); + + if (tmp > grp.last_evt()) { + break; + } + + IF_VERB(STANDARD) { + std::cout << "\tEvent Type " << tmp << std::endl; + } + + ret = amdsmi_dev_counter_create(dv_ind, + static_cast(tmp), &evt_handle.get()[j]); + CHK_ERR_ASRT(ret) + } + num_created = j; + IF_VERB(STANDARD) { + std::cout << "\tStart Counters..." << std::endl; + } + uint32_t tmp_cntrs; + + for (j = 0; j < num_created; ++j) { + tmp = static_cast(evnt + j); + + ret = amdsmi_counter_control(evt_handle.get()[j], AMDSMI_CNTR_CMD_START, + nullptr); + CHK_ERR_ASRT(ret) + + ret = amdsmi_counter_available_counters_get(dv_ind, grp.group(), + &tmp_cntrs); + CHK_ERR_ASRT(ret) + ASSERT_EQ(tmp_cntrs, (avail_counters - j - 1)); + } + + sleep(1); + + IF_VERB(STANDARD) { + std::cout << "\tRead Counters..." << std::endl; + } + for (j = 0; j < num_created; ++j) { + tmp = static_cast(evnt + j); + + ret = amdsmi_counter_read(evt_handle.get()[j], &val); + CHK_ERR_ASRT(ret) + + IF_VERB(STANDARD) { + std::cout << "\tCounter: " << tmp << std::endl; + std::cout << "\tSuccessfully read value: " << std::endl; + std::cout << "\t\tValue: " << val.value << std::endl; + std::cout << "\t\tTime Enabled: " << val.time_enabled << std::endl; + std::cout << "\t\tTime Running: " << val.time_running << std::endl; + } + } + for (j = 0; j < num_created; ++j) { + ret = amdsmi_dev_counter_destroy(evt_handle.get()[j]); + CHK_ERR_ASRT(ret) + } + } + } +} + +void TestPerfCntrReadWrite::Run(void) { + TestBase::Run(); + if (setup_failed_) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + return; + } + + for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) { + amdsmi_device_handle dev_handle = device_handles_[dv_ind]; + PrintDeviceHeader(dev_handle); + try { + testEventsIndividually(dev_handle); + testEventsSimultaneously(dev_handle); + } catch(amdsmi_status_t r) { + switch (r) { + case AMDSMI_STATUS_NOT_SUPPORTED: + std::cout << "The performance counter event tried is not " + "supported for this device" << std::endl; + break; + + default: + std::cout << "Unexpected exception caught with amdsmi " + "return value of " << r << std::endl; + } + } catch(...) { + ASSERT_FALSE("Unexpected exception caught"); + } + } +} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.h b/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.h new file mode 100755 index 0000000000..771430f11e --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/perf_cntr_read_write.h @@ -0,0 +1,101 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_ + +#include + +#include "amd_smi_test/test_base.h" + +class TestPerfCntrReadWrite : public TestBase { + public: + TestPerfCntrReadWrite(); + + // @Brief: Destructor for test case of TestPerfCntrReadWrite + virtual ~TestPerfCntrReadWrite(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); + + private: + void CountEvents(amdsmi_device_handle dv_ind, + amdsmi_event_type_t evnt, amdsmi_counter_value_t *val, + int32_t sleep_sec = 1); + void testEventsIndividually(amdsmi_device_handle dv_ind); + void testEventsSimultaneously(amdsmi_device_handle dv_ind); +}; + +class PerfCntrEvtGrp { + public: + explicit PerfCntrEvtGrp(amdsmi_event_group_t grp, + uint32_t first, uint32_t last, std::string name); + ~PerfCntrEvtGrp(); + + amdsmi_event_group_t group(void) const { return grp_;} + uint32_t first_evt(void) const {return first_evt_;} + uint32_t last_evt(void) const {return last_evt_;} + uint32_t num_events(void) const {return num_events_;} + std::string name(void) const { return name_;} + private: + amdsmi_event_group_t grp_; + uint32_t first_evt_; + uint32_t last_evt_; + uint32_t num_events_; + std::string name_; +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_PERF_CNTR_READ_WRITE_H_ diff --git a/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.cc new file mode 100755 index 0000000000..30e7046df4 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.cc @@ -0,0 +1,230 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "amd_smi.h" +#include "amd_smi_test/functional/process_info_read.h" +#include "amd_smi_test/test_common.h" + +TestProcInfoRead::TestProcInfoRead() : TestBase() { + set_title("AMDSMI Process Info Read Test"); + set_description("This test verifies that process information such as the " + "process ID, PASID, etc. can be read properly."); +} + +TestProcInfoRead::~TestProcInfoRead(void) { +} + +void TestProcInfoRead::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestProcInfoRead::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestProcInfoRead::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestProcInfoRead::Close() { + // This will close handles opened within amdsmitst utility calls and call + // amdsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +static void dumpProcess(amdsmi_process_info_t *p) { + assert(p != nullptr); + std::cout << "\t** ProcessID: " << p->process_id << " "; + std::cout << "PASID: " << p->pasid << " "; + std::cout << std::endl; +} +void TestProcInfoRead::Run(void) { + amdsmi_status_t err; + uint32_t num_proc_found; + uint32_t val_ui32; + amdsmi_process_info_t *procs = nullptr; + + TestBase::Run(); + if (setup_failed_) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + return; + } + + uint32_t num_devices = num_monitor_devs(); + CHK_ERR_ASRT(err) + + err = amdsmi_compute_process_info_get(nullptr, &num_proc_found); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << "\t**Process info. read: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + IF_VERB(STANDARD) { + std::cout << "\t** " << std::dec << num_proc_found << + " GPU processes found" << std::endl; + } + } + + if (num_proc_found == 0) { + return; + } + + procs = new amdsmi_process_info_t[num_proc_found]; + + val_ui32 = num_proc_found; + err = amdsmi_compute_process_info_get(procs, &val_ui32); + if (err != AMDSMI_STATUS_SUCCESS) { + if (err == AMDSMI_STATUS_INSUFFICIENT_SIZE) { + IF_VERB(STANDARD) { + std::cout << "\t** " << val_ui32 << + " processes were read, but more became available that were unread." + << std::endl; + for (uint32_t i = 0; i < val_ui32; ++i) { + dumpProcess(&procs[i]); + } + + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + IF_VERB(STANDARD) { + std::cout << "\t** Processes currently using GPU: " << std::endl; + for (uint32_t i = 0; i < val_ui32; ++i) { + dumpProcess(&procs[i]); + } + } + } + + // Reset to the number we actually read + num_proc_found = val_ui32; + if (num_proc_found) { + // Allocate the max we expect to get + uint32_t *dev_inds = new uint32_t[num_devices]; + uint32_t amt_allocd = num_devices; + + for (uint32_t j = 0; j < num_proc_found; j++) { + err = amdsmi_compute_process_gpus_get(procs[j].process_id, dev_inds, + &amt_allocd); + if (err == AMDSMI_STATUS_NOT_FOUND) { + std::cout << "\t** Process " << procs[j].process_id << + " is no longer present."; + continue; + } else { + CHK_ERR_ASRT(err); + ASSERT_LE(amt_allocd, num_devices); + } + std::cout << "\t** Process " << procs[j].process_id << + " is using devices with indices: "; + uint32_t i; + if (amt_allocd > 0) { + for (i = 0; i < amt_allocd - 1; ++i) { + std::cout << dev_inds[i] << ", "; + } + std::cout << dev_inds[i]; + } + std::cout << std::endl; + // Reset amt_allocd back to the amount acutally allocated + amt_allocd = num_devices; + } + + delete []dev_inds; + + amdsmi_process_info_t proc_info; + for (uint32_t j = 0; j < num_proc_found; j++) { + memset(&proc_info, 0x0, sizeof(amdsmi_process_info_t)); + err = amdsmi_compute_process_info_by_pid_get(procs[j].process_id, + &proc_info); + if (err == AMDSMI_STATUS_NOT_FOUND) { + std::cout << + "\t** WARNING: amdsmi_compute_process_info_get() found process " << + procs[j].process_id << ", but subsequently, " + "amdsmi_compute_process_info_by_pid_get() did not" + " find this same process." << std::endl; + } else { + CHK_ERR_ASRT(err) + ASSERT_EQ(proc_info.process_id, procs[j].process_id); + ASSERT_EQ(proc_info.pasid, procs[j].pasid); + std::cout << "\t** Process ID: " << + procs[j].process_id << " VRAM Usage: " << + proc_info.vram_usage << + " SDMA Usage: " << + proc_info.sdma_usage << + " Compute Unit Usage: " << + proc_info.cu_occupancy << + std::endl; + } + } + } + if (num_proc_found > 1) { + amdsmi_process_info_t tmp_proc; + val_ui32 = 1; + err = amdsmi_compute_process_info_get(&tmp_proc, &val_ui32); + + if (err != AMDSMI_STATUS_INSUFFICIENT_SIZE) { + std::cout << "Expected amdsmi_compute_process_info_get() to tell us" + " there are more processes available, but instead go return code " << + err << std::endl; + } + } + delete []procs; +} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.h b/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.h new file mode 100755 index 0000000000..97b3afb239 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/process_info_read.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ + +#include "amd_smi_test/test_base.h" + +class TestProcInfoRead : public TestBase { + public: + TestProcInfoRead(); + + // @Brief: Destructor for test case of TestProcInfoRead + virtual ~TestProcInfoRead(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ diff --git a/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc new file mode 100755 index 0000000000..315f06a015 --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc @@ -0,0 +1,149 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include + +#include "gtest/gtest.h" +#include "amd_smi.h" +#include "amd_smi_test/functional/xgmi_read_write.h" +#include "amd_smi_test/test_common.h" + +TestXGMIReadWrite::TestXGMIReadWrite() : TestBase() { + set_title("AMDSMI XGMI Read/Write Test"); + set_description("This test verifies that XGMI error counts can be read" + " properly, and that the count can be reset."); +} + +TestXGMIReadWrite::~TestXGMIReadWrite(void) { +} + +void TestXGMIReadWrite::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestXGMIReadWrite::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestXGMIReadWrite::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestXGMIReadWrite::Close() { + // This will close handles opened within rsmitst utility calls and call + // amdsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + + +void TestXGMIReadWrite::Run(void) { + amdsmi_status_t err; + amdsmi_xgmi_status_t err_stat; + uint64_t hive_id; + + TestBase::Run(); + if (setup_failed_) { + IF_VERB(STANDARD) { + std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; + } + return; + } + + for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) { + auto device = device_handles_[dv_ind]; + PrintDeviceHeader(device); + + amdsmi_xgmi_info_t info; + err = amdsmi_get_xgmi_info(device, &info); + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + std::cout << + "\t**amdsmi_dev_xgmi_hive_id_get() is not supported" + " on this machine" << std::endl; + continue; + } else { + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**XGMI Hive ID : " << std::hex << + info.xgmi_hive_id << std::endl; + } + } + + err = amdsmi_dev_xgmi_error_status(device, &err_stat); + + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << "\t**XGMI Error Status: Not supported on this machine" + << std::endl; + } + // Verify api support checking functionality is working + err = amdsmi_dev_xgmi_error_status(device, nullptr); + ASSERT_EQ(err, AMDSMI_STATUS_NOT_SUPPORTED); + + continue; + } + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**XGMI Error Status: " << + static_cast(err_stat) << std::endl; + } + // Verify api support checking functionality is working + err = amdsmi_dev_xgmi_error_status(device, nullptr); + ASSERT_EQ(err, AMDSMI_STATUS_INVAL); + + // TODO(cfree) We need to find a way to generate xgmi errors so this + // test won't be meaningless + err = amdsmi_dev_xgmi_error_reset(device); + CHK_ERR_ASRT(err) + IF_VERB(STANDARD) { + std::cout << "\t**Successfully reset XGMI Error Status: " << std::endl; + } + } +} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.h b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.h new file mode 100755 index 0000000000..d54ad0988f --- /dev/null +++ b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_ +#define TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_ + +#include "amd_smi_test/test_base.h" + +class TestXGMIReadWrite : public TestBase { + public: + TestXGMIReadWrite(); + + // @Brief: Destructor for test case of TestXGMIReadWrite + virtual ~TestXGMIReadWrite(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_AMD_SMI_TEST_FUNCTIONAL_XGMI_READ_WRITE_H_ diff --git a/projects/amdsmi/tests/amd_smi_test/main.cc b/projects/amdsmi/tests/amd_smi_test/main.cc index 0be9e57466..ff5124498a 100644 --- a/projects/amdsmi/tests/amd_smi_test/main.cc +++ b/projects/amdsmi/tests/amd_smi_test/main.cc @@ -5,7 +5,7 @@ * The University of Illinois/NCSA * Open Source License (NCSA) * - * Copyright (c) 2018, Advanced Micro Devices, Inc. + * Copyright (c) 2022, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: @@ -56,6 +56,11 @@ #include "functional/fan_read.h" #include "functional/fan_read_write.h" #include "functional/evt_notif_read_write.h" +#include "functional/perf_cntr_read_write.h" +#include "amd_smi_test/functional/hw_topology_read.h" +#include "functional/xgmi_read_write.h" +#include "functional/api_support_read.h" +#include "functional/process_info_read.h" /* #include "functional/temp_read.h" #include "functional/volt_read.h" @@ -76,14 +81,9 @@ #include "functional/err_cnt_read.h" #include "functional/mem_util_read.h" #include "functional/id_info_read.h" -#include "functional/perf_cntr_read_write.h" -#include "functional/process_info_read.h" -#include "functional/xgmi_read_write.h" #include "functional/mem_page_info_read.h" -#include "functional/api_support_read.h" #include "functional/mutual_exclusion.h" #include "functional/init_shutdown_refcount.h" -#include "amd_smi_test/functional/hw_topology_read.h" #include "amd_smi_test/functional/gpu_metrics_read.h" #include "amd_smi_test/functional/metrics_counter_read.h" #include "amd_smi_test/functional/perf_determinism.h" @@ -157,6 +157,26 @@ TEST(amdsmitstReadWrite, TestEvtNotifReadWrite) { TestEvtNotifReadWrite tst; RunGenericTest(&tst); } +TEST(amdsmitstReadWrite, TestPerfCntrReadWrite) { + TestPerfCntrReadWrite tst; + RunGenericTest(&tst); +} +TEST(amdsmitstReadWrite, TestXGMIReadWrite) { + TestXGMIReadWrite tst; + RunGenericTest(&tst); +} +TEST(amdsmitstReadOnly, TestAPISupportRead) { + TestAPISupportRead tst; + RunGenericTest(&tst); +} +TEST(amdsmitstReadOnly, TestHWTopologyRead) { + TestHWTopologyRead tst; + RunGenericTest(&tst); +} +TEST(amdsmitstReadOnly, TestProcInfoRead) { + TestProcInfoRead tst; + RunGenericTest(&tst); +} /* TEST(amdsmitstReadOnly, TempRead) { TestTempRead tst; @@ -230,18 +250,6 @@ TEST(amdsmitstReadOnly, TestIdInfoRead) { TestIdInfoRead tst; RunGenericTest(&tst); } -TEST(amdsmitstReadWrite, TestPerfCntrReadWrite) { - TestPerfCntrReadWrite tst; - RunGenericTest(&tst); -} -TEST(amdsmitstReadOnly, TestProcInfoRead) { - TestProcInfoRead tst; - RunGenericTest(&tst); -} -TEST(amdsmitstReadOnly, TestHWTopologyRead) { - TestHWTopologyRead tst; - RunGenericTest(&tst); -} TEST(amdsmitstReadOnly, TestGpuMetricsRead) { TestGpuMetricsRead tst; RunGenericTest(&tst); @@ -254,18 +262,11 @@ TEST(amdsmitstReadWrite, TestPerfDeterminism) { TestPerfDeterminism tst; RunGenericTest(&tst); } -TEST(amdsmitstReadWrite, TestXGMIReadWrite) { - TestXGMIReadWrite tst; - RunGenericTest(&tst); -} TEST(amdsmitstReadOnly, TestMemPageInfoRead) { TestMemPageInfoRead tst; RunGenericTest(&tst); } -TEST(amdsmitstReadOnly, TestAPISupportRead) { - TestAPISupportRead tst; - RunGenericTest(&tst); -} + TEST(amdsmitstReadOnly, TestMutualExclusion) { TestMutualExclusion tst; SetFlags(&tst); @@ -274,6 +275,7 @@ TEST(amdsmitstReadOnly, TestMutualExclusion) { tst.Run(); RunCustomTestEpilog(&tst); } + TEST(amdsmitstReadOnly, TestConcurrentInit) { TestConcurrentInit tst; SetFlags(&tst);