diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 941f877260..d3963e6040 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -160,9 +160,13 @@ typedef enum { INTEGER = 0, DOUBLE, STRING, BLOB } rdc_field_type_t; typedef enum { RDC_FI_INVALID = 0, //!< Invalid field value //!< @brief Identifier fields - RDC_FI_GPU_COUNT = 1, //!< GPU count in the system - RDC_FI_DEV_NAME, //!< Name of the device - RDC_FI_OAM_ID, //!< OAM ID of the device + RDC_FI_GPU_COUNT = 1, //!< GPU count in the system + RDC_FI_DEV_NAME, //!< Name of the device + RDC_FI_OAM_ID, //!< OAM ID of the device + RDC_FI_DEV_ID, //!< Device ID + RDC_FI_REV_ID, //!< + RDC_FI_TARGET_GRAPHICS_VERSION, //!< Target graphics version + RDC_FI_NUM_OF_COMPUTE_UNITS, //!< Number of compute units /** * @brief Frequency related fields @@ -388,6 +392,9 @@ typedef uint32_t rdc_field_grp_t; //!< Field group ID type */ typedef struct { char device_name[RDC_MAX_STR_LENGTH]; //!< Name of the device. + uint64_t device_id; //!< The device id of a GPU + uint32_t num_of_compute_units; + uint64_t target_graphics_version; } rdc_device_attributes_t; /** @@ -533,7 +540,8 @@ typedef enum { RDC_DIAG_RVS_MEMBW_TEST, //!< RVS bandwidth test RDC_DIAG_RVS_H2DD2H_TEST, //!< RVS Host<->Device transfer speed test RDC_DIAG_RVS_IET_TEST, //!< RVS IET test - RDC_DIAG_TEST_LAST = RDC_DIAG_RVS_IET_TEST + RDC_DIAG_RVS_CUSTOM, //!< RVS custom test + RDC_DIAG_TEST_LAST, } rdc_diag_test_cases_t; /** @@ -547,7 +555,7 @@ typedef enum { /** * @brief The maximum test cases to run */ -#define MAX_TEST_CASES (RDC_DIAG_TEST_LAST - RDC_DIAG_TEST_FIRST + 1) +#define MAX_TEST_CASES (RDC_DIAG_TEST_LAST - RDC_DIAG_TEST_FIRST) /** * @brief The maximum length of the diagnostic messages @@ -1607,6 +1615,8 @@ rdc_status_t rdc_config_get(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, */ rdc_status_t rdc_config_clear(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id); +const char* get_rocm_path(const char* search_string); + #ifdef __cplusplus } #endif // __cplusplus diff --git a/include/rdc_modules/rdc_rvs/RvsBase.h b/include/rdc_modules/rdc_rvs/RvsBase.h index 6e5dae6c01..062321724d 100644 --- a/include/rdc_modules/rdc_rvs/RvsBase.h +++ b/include/rdc_modules/rdc_rvs/RvsBase.h @@ -22,61 +22,96 @@ THE SOFTWARE. #ifndef RDC_MODULES_RDC_RVS_RVSBASE_H_ #define RDC_MODULES_RDC_RVS_RVSBASE_H_ +#include + #include #include +#include #include #include +#include #include "rdc/rdc.h" +#include "rdc_lib/RdcLogger.h" #include "rvs/rvs.h" static constexpr size_t MAX_CONFIG_LENGTH = 1024; -// NOTE: There MUST be a space after : -static const std::map test_to_conf = { - // derived from conf/gst_single.conf - {RDC_DIAG_RVS_GST_TEST, - "{actions: [{name: gpustress-9000-sgemm-false, device: all, " - "device_index: '0', module: gst, parallel: false, count: 1, duration: " - "10000, copy_matrix: false, target_stress: 9000, matrix_size_a: 8640, " - "matrix_size_b: 8640, matrix_size_c: 8640, ops_type: sgemm, lda: 8640, " - "ldb: 8640, ldc: 8640}]}"}, - // derived from conf/MI300X/babel.conf - {RDC_DIAG_RVS_MEMBW_TEST, - "{actions: [{name: babel-float-256MiB," - "device: all, module: babel, " - "parallel: false, count: 1, num_iter: 5000, array_size: 268435456, " - "test_type: 1, mibibytes: true, o/p_csv: false, subtest: 5}]}"}, - // derived from conf/MI300X/pebb_single.conf - {RDC_DIAG_RVS_H2DD2H_TEST, - "{actions: [{name: h2d-d2h-sequential-64MB," - "device: all, module: pebb, duration: 120000, device_to_host: true, " - "host_to_device: true, parallel: false, block_size: 67108864, " - "link_type: 2, warm_calls: 10, hot_calls: 100, b2b: true}]}"}, - // derived from conf/MI300X/iet_single.conf - {RDC_DIAG_RVS_IET_TEST, - "{actions: [{name: iet-400W-1K-rand-dgemm," - "device: all, module: iet, parallel: true, duration: 60000, " - "sample_interval: 3000, target_power: 400, matrix_size: 1024, " - "matrix_init: rand, ops_type: dgemm}]}"}, +// this map only makes sense in context of test config locations as originally +// designed in RVS +static const std::map gfx_to_rvs_conf = { + {0x90a, "MI210"}, // ? + {0x940, "MI300A"}, // ? + {0x941, "MI300A"}, // ? + {0x942, "MI300X"}, // ? + {0x94a, "MI308X"}, // ? + {0x1030, "nv21"}, // + {0x1031, "nv21"}, // ? + {0x1032, "nv21"}, // ? + {0x1033, "nv21"}, // ? + {0x1034, "nv21"}, // ? + {0x1035, "nv21"}, // ? + {0x1100, "nv31"}, // ? + {0x1101, "nv31"}, // ? + {0x1102, "nv31"}, // ? + {0x1103, "nv31"}, // ? }; static const std::map test_to_name = { - {RDC_DIAG_RVS_GST_TEST, "RVS_GST_TEST"}, - {RDC_DIAG_RVS_MEMBW_TEST, "RVS_MEMBW_TEST"}, - {RDC_DIAG_RVS_H2DD2H_TEST, "RVS_H2DD2H_TEST"}, - {RDC_DIAG_RVS_IET_TEST, "RVS_IET_TEST"}, + {RDC_DIAG_RVS_GST_TEST, "gst_single.conf"}, {RDC_DIAG_RVS_MEMBW_TEST, "babel.conf"}, + {RDC_DIAG_RVS_H2DD2H_TEST, "pebb_single.conf"}, {RDC_DIAG_RVS_IET_TEST, "iet_stress.conf"}, + {RDC_DIAG_RVS_CUSTOM, "CUSTOM_CONFIG"}, }; namespace amd { namespace rdc { +inline amdsmi_status_t get_processor_handle_from_id(uint32_t gpu_id, + amdsmi_processor_handle* processor_handle) { + uint32_t socket_count; + uint32_t processor_count; + auto ret = amdsmi_get_socket_handles(&socket_count, nullptr); + if (ret != AMDSMI_STATUS_SUCCESS) { + return ret; + } + std::vector sockets(socket_count); + std::vector all_processors{}; + ret = amdsmi_get_socket_handles(&socket_count, sockets.data()); + for (auto& socket : sockets) { + ret = amdsmi_get_processor_handles(socket, &processor_count, nullptr); + if (ret != AMDSMI_STATUS_SUCCESS) { + return ret; + } + std::vector processors(processor_count); + ret = amdsmi_get_processor_handles(socket, &processor_count, processors.data()); + if (ret != AMDSMI_STATUS_SUCCESS) { + return ret; + } + + for (auto& processor : processors) { + processor_type_t processor_type = {}; + ret = amdsmi_get_processor_type(processor, &processor_type); + if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) { + RDC_LOG(RDC_ERROR, "Expect AMD_GPU device type!"); + return AMDSMI_STATUS_NOT_SUPPORTED; + } + all_processors.push_back(processor); + } + } + + if (gpu_id >= all_processors.size()) { + return AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS; + } + + // Get processor handle from GPU id + *processor_handle = all_processors[gpu_id]; + + return AMDSMI_STATUS_SUCCESS; +} + class RdcRVSBase { public: - RdcRVSBase() { s_instance = this; }; - ~RdcRVSBase() { - if (s_instance == this) { - s_instance = nullptr; - } - }; + RdcRVSBase(); + + ~RdcRVSBase(); // only one instance allowed RdcRVSBase(const RdcRVSBase&) = delete; @@ -87,12 +122,16 @@ class RdcRVSBase { RdcRVSBase& operator=(RdcRVSBase&&) = delete; rvs_status_t run_rvs_app(const char* config, size_t config_size, rdc_diag_callback_t* callback); + std::vector get_rvs_configs(); + std::map get_test_to_conf(); private: static RdcRVSBase* s_instance; volatile rvs_session_state_t _state = RVS_SESSION_STATE_IDLE; rdc_diag_callback_t* _callback = nullptr; rvs_session_callback _rvs_callback = nullptr; + std::vector _rvs_config_list = {}; + std::map _test_to_conf = {}; // Static callback function that the C API will call static void static_callback(rvs_session_id_t session_id, const rvs_results_t* results) { @@ -101,7 +140,7 @@ class RdcRVSBase { s_instance->session_callback(session_id, results); } } - void session_callback(rvs_session_id_t session_id, const rvs_results_t* results) { + void session_callback(rvs_session_id_t /*session_id*/, const rvs_results_t* results) { _state = results->state; // std::string output = "\n"; // output += "session id -> " + std::to_string(session_id) + "\n"; diff --git a/rdc_libs/bootstrap/src/RdcBootStrap.cc b/rdc_libs/bootstrap/src/RdcBootStrap.cc index 3520041de9..63751b87b5 100644 --- a/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -21,6 +21,7 @@ THE SOFTWARE. */ #include #include +#include #include @@ -476,6 +477,7 @@ char* strncpy_with_null(char* dest, const char* src, size_t n) { return dest; } + rdc_status_t rdc_policy_set(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_policy_t policy) { if (!p_rdc_handle) { @@ -532,4 +534,43 @@ rdc_status_t rdc_link_status_get(rdc_handle_t p_rdc_handle, rdc_link_status_t* r } return static_cast(p_rdc_handle) ->rdc_link_status_get(results); -} \ No newline at end of file +} + +const char * get_rocm_path(const char * search_string) { + // set default rocm path in case lookup fails + static std::string rocm_path("/opt/rocm"); + const char* rocm_path_env = getenv("ROCM_PATH"); + if (rocm_path_env != nullptr) { + rocm_path = rocm_path_env; + } + + std::ifstream file("/proc/self/maps"); + + if (!file.is_open()) { + RDC_LOG(RDC_DEBUG, "CANT OPEN FILE"); + return rocm_path.c_str(); + } + + std::string line; + while (getline(file, line)) { + size_t index_end = line.find(search_string); + size_t index_start = index_end; + if (index_end == std::string::npos) { + // no library on this line + continue; + } + // walk index backwards until it reaches a space + while ((index_start > 0) && (line[index_start - 1] != ' ')) { + index_start--; + } + // extract library path, drop library name + rocm_path = line.substr(index_start, index_end - index_start); + // appending "../" should result in "/opt/rocm/lib/.." or similar + rocm_path += ".."; + RDC_LOG(RDC_DEBUG, "FOUND SOMETHING!"); + return rocm_path.c_str(); + } + + return rocm_path.c_str(); +} + diff --git a/rdc_libs/rdc/src/RdcDiagnosticModule.cc b/rdc_libs/rdc/src/RdcDiagnosticModule.cc index 1da5a27f57..5fdd52c4cb 100644 --- a/rdc_libs/rdc/src/RdcDiagnosticModule.cc +++ b/rdc_libs/rdc/src/RdcDiagnosticModule.cc @@ -49,9 +49,12 @@ rdc_status_t RdcDiagnosticModule::rdc_diag_test_cases_query( return RDC_ST_OK; } -rdc_status_t RdcDiagnosticModule::rdc_test_case_run( - rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, - const char* config, size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { +rdc_status_t RdcDiagnosticModule::rdc_test_case_run(rdc_diag_test_cases_t test_case, + uint32_t gpu_index[RDC_MAX_NUM_DEVICES], + uint32_t gpu_count, const char* config, + size_t config_size, + rdc_diag_test_result_t* result, + rdc_diag_callback_t* callback) { if (result == nullptr) { return RDC_ST_BAD_PARAMETER; } @@ -74,24 +77,41 @@ rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run(const rdc_group_info_t& gpu size_t config_size, rdc_diag_response_t* response, rdc_diag_callback_t* callback) { + const bool is_custom = config != nullptr && config_size != 0; + if (response == nullptr) { return RDC_ST_BAD_PARAMETER; } - std::vector rdc_runs; + std::vector tests_to_search_for; if (level >= RDC_DIAG_LVL_SHORT) { // Short run and above - rdc_runs.push_back(RDC_DIAG_COMPUTE_PROCESS); - rdc_runs.push_back(RDC_DIAG_NODE_TOPOLOGY); - rdc_runs.push_back(RDC_DIAG_GPU_PARAMETERS); - rdc_runs.push_back(RDC_DIAG_COMPUTE_QUEUE); - rdc_runs.push_back(RDC_DIAG_SYS_MEM_CHECK); + tests_to_search_for.push_back(RDC_DIAG_COMPUTE_PROCESS); + tests_to_search_for.push_back(RDC_DIAG_NODE_TOPOLOGY); + tests_to_search_for.push_back(RDC_DIAG_GPU_PARAMETERS); + tests_to_search_for.push_back(RDC_DIAG_COMPUTE_QUEUE); + tests_to_search_for.push_back(RDC_DIAG_SYS_MEM_CHECK); } if (level >= RDC_DIAG_LVL_MED) { // Medium run and above - rdc_runs.push_back(RDC_DIAG_RVS_GST_TEST); - rdc_runs.push_back(RDC_DIAG_RVS_MEMBW_TEST); - rdc_runs.push_back(RDC_DIAG_RVS_H2DD2H_TEST); - rdc_runs.push_back(RDC_DIAG_RVS_IET_TEST); + tests_to_search_for.push_back(RDC_DIAG_RVS_GST_TEST); + tests_to_search_for.push_back(RDC_DIAG_RVS_MEMBW_TEST); + tests_to_search_for.push_back(RDC_DIAG_RVS_H2DD2H_TEST); + tests_to_search_for.push_back(RDC_DIAG_RVS_IET_TEST); + } + + std::vector tests_to_run; + if (is_custom) { + // respect custom config + tests_to_run.push_back(RDC_DIAG_RVS_CUSTOM); + } else { + // respect level + for (auto& test : tests_to_search_for) { + if (testcases_to_module_.find(test) != testcases_to_module_.end()) { + tests_to_run.push_back(test); + } else { + RDC_LOG(RDC_DEBUG, "test not found: " << test); + } + } } if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { @@ -99,15 +119,17 @@ rdc_status_t RdcDiagnosticModule::rdc_diagnostic_run(const rdc_group_info_t& gpu callback->callback(callback->cookie, log.data()); } + unsigned int i = 0; response->results_count = 0; - for (unsigned int i = 0; i < rdc_runs.size(); i++) { + for (i = 0; i < tests_to_run.size(); i++) { if (callback != nullptr && callback->callback != nullptr && callback->cookie != nullptr) { - std::string log = "Test " + std::to_string(i) + " / " + std::to_string(rdc_runs.size()); + std::string log = + "Test " + std::to_string(i + 1) + " / " + std::to_string(tests_to_run.size()); callback->callback(callback->cookie, log.data()); } - response->diag_info[i].test_case = rdc_runs[i]; + response->diag_info[i].test_case = tests_to_run[i]; // NOTE: rdc_test_case_run reuses the diagnostic_run callback - rdc_test_case_run(rdc_runs[i], const_cast(gpus.entity_ids), gpus.count, config, + rdc_test_case_run(tests_to_run[i], const_cast(gpus.entity_ids), gpus.count, config, config_size, &(response->diag_info[i]), callback); response->results_count++; } diff --git a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index d4fce4f849..1e62cd454b 100644 --- a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -432,33 +432,33 @@ rdc_status_t RdcMetricFetcherImpl::bulk_fetch_smi_fields( constexpr double kGig = 1000000000.0; static uint64_t sum_xgmi_read(const amdsmi_gpu_metrics_t& gpu_metrics) { - uint64_t total = 0; - const auto not_supported_metrics_data = std::numeric_limits::max(); - for (int i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i) { - if (gpu_metrics.xgmi_read_data_acc[i] == not_supported_metrics_data){ - continue; - } - total += gpu_metrics.xgmi_read_data_acc[i]; + uint64_t total = 0; + const auto not_supported_metrics_data = std::numeric_limits::max(); + for (int i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i) { + if (gpu_metrics.xgmi_read_data_acc[i] == not_supported_metrics_data) { + continue; } - if (total == 0){ - return not_supported_metrics_data; - } - return total; + total += gpu_metrics.xgmi_read_data_acc[i]; + } + if (total == 0) { + return not_supported_metrics_data; + } + return total; } static uint64_t sum_xgmi_write(const amdsmi_gpu_metrics_t& gpu_metrics) { - uint64_t total = 0; - const auto not_supported_metrics_data = std::numeric_limits::max(); - for (int i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i) { - if (gpu_metrics.xgmi_write_data_acc[i] == not_supported_metrics_data){ - continue; - } - total += gpu_metrics.xgmi_write_data_acc[i]; + uint64_t total = 0; + const auto not_supported_metrics_data = std::numeric_limits::max(); + for (int i = 0; i < AMDSMI_MAX_NUM_XGMI_LINKS; ++i) { + if (gpu_metrics.xgmi_write_data_acc[i] == not_supported_metrics_data) { + continue; } - if (total == 0){ - return not_supported_metrics_data; - } - return total; + total += gpu_metrics.xgmi_write_data_acc[i]; + } + if (total == 0) { + return not_supported_metrics_data; + } + return total; } rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field_t field_id, @@ -659,6 +659,7 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field break; } case RDC_FI_DEV_NAME: { + // source values from asic_info amdsmi_asic_info_t asic_info; value->status = amdsmi_get_gpu_asic_info(processor_handle, &asic_info); value->type = STRING; @@ -700,17 +701,44 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field value->value.l_int = num_pages; } break; - case RDC_FI_OAM_ID: { + case RDC_FI_OAM_ID: + case RDC_FI_DEV_ID: + case RDC_FI_REV_ID: + case RDC_FI_TARGET_GRAPHICS_VERSION: + case RDC_FI_NUM_OF_COMPUTE_UNITS: { amdsmi_asic_info_t asic_info; value->status = amdsmi_get_gpu_asic_info(processor_handle, &asic_info); value->type = INTEGER; - if (value->status == AMDSMI_STATUS_SUCCESS) { + if (value->status != AMDSMI_STATUS_SUCCESS) { + break; + } + if (field_id == RDC_FI_OAM_ID) { // 0xFFFF means not supported for OAM ID if (asic_info.oam_id == 0xFFFF) { value->status = AMDSMI_STATUS_NOT_SUPPORTED; } else { value->value.l_int = asic_info.oam_id; } + } else if (field_id == RDC_FI_DEV_ID) { + value->value.l_int = asic_info.device_id; + } else if (field_id == RDC_FI_REV_ID) { + value->value.l_int = asic_info.rev_id; + } else if (field_id == RDC_FI_TARGET_GRAPHICS_VERSION) { + if (asic_info.target_graphics_version == 0xFFFFFFFFFFFFFFFF) { + value->status = AMDSMI_STATUS_NOT_SUPPORTED; + } else { + value->value.l_int = asic_info.target_graphics_version; + } + } else if (field_id == RDC_FI_NUM_OF_COMPUTE_UNITS) { + if (asic_info.num_of_compute_units == 0xFFFFFFFF) { + value->status = AMDSMI_STATUS_NOT_SUPPORTED; + } else { + value->value.l_int = asic_info.num_of_compute_units; + } + } else { + // this should never happen as all fields are handled above + RDC_LOG(RDC_ERROR, "Unexpected field id: " << field_id); + value->status = AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS; } break; } @@ -726,7 +754,7 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field uint64_t timestamp; value->status = amdsmi_get_utilization_count(processor_handle, utilization_counters, - kUTILIZATION_COUNTERS, ×tamp); + kUTILIZATION_COUNTERS, ×tamp); value->type = INTEGER; if (value->status == AMDSMI_STATUS_SUCCESS) { value->value.l_int = static_cast(utilization_counters[0].value); @@ -858,32 +886,29 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field uint32_t num_pages = 0; ret = amdsmi_get_gpu_bad_page_info(processor_handle, &num_pages, nullptr); if (AMDSMI_STATUS_SUCCESS == ret) { - if (RDC_HEALTH_RETIRED_PAGE_NUM == field_id) { - value->status = Smi2RdcError(ret); - value->type = INTEGER; - value->value.l_int = static_cast(num_pages); - break; - } + if (RDC_HEALTH_RETIRED_PAGE_NUM == field_id) { + value->status = Smi2RdcError(ret); + value->type = INTEGER; + value->value.l_int = static_cast(num_pages); + break; + } - if ((0 < num_pages) && - (RDC_HEALTH_PENDING_PAGE_NUM == field_id)) { - std::vector bad_page_info(num_pages); - ret = amdsmi_get_gpu_bad_page_info(processor_handle, &num_pages, - bad_page_info.data()); - value->status = Smi2RdcError(ret); - value->type = INTEGER; - if (AMDSMI_STATUS_SUCCESS == ret) { - uint64_t pending_page_num = 0; - for (uint32_t i=0; i < num_pages; i++) { - if (AMDSMI_MEM_PAGE_STATUS_PENDING == bad_page_info[i].status) - pending_page_num++; - } - - value->value.l_int = static_cast(pending_page_num); + if ((0 < num_pages) && (RDC_HEALTH_PENDING_PAGE_NUM == field_id)) { + std::vector bad_page_info(num_pages); + ret = amdsmi_get_gpu_bad_page_info(processor_handle, &num_pages, bad_page_info.data()); + value->status = Smi2RdcError(ret); + value->type = INTEGER; + if (AMDSMI_STATUS_SUCCESS == ret) { + uint64_t pending_page_num = 0; + for (uint32_t i = 0; i < num_pages; i++) { + if (AMDSMI_MEM_PAGE_STATUS_PENDING == bad_page_info[i].status) pending_page_num++; } + + value->value.l_int = static_cast(pending_page_num); } + } } else - value->status = Smi2RdcError(ret); + value->status = Smi2RdcError(ret); break; } diff --git a/rdc_libs/rdc_modules/rdc_rvs/CMakeLists.txt b/rdc_libs/rdc_modules/rdc_rvs/CMakeLists.txt index 0be124cc3a..e96d9109c3 100644 --- a/rdc_libs/rdc_modules/rdc_rvs/CMakeLists.txt +++ b/rdc_libs/rdc_modules/rdc_rvs/CMakeLists.txt @@ -61,4 +61,13 @@ if(BUILD_RVS) TARGET ${RDC_RVS_LIB} POST_BUILD COMMAND ${CMAKE_STRIP} ${RDC_RVS_LIB_COMPONENT}.so) endif() + + # Install RVS config files into /opt/rocm/share/rdc/conf/rvs/ + #file(GLOB RDC_RVS_CONFIG_FILES "${SRC_DIR}/conf/*") + install(DIRECTORY "${SRC_DIR}/conf/" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${RDC}/conf/rvs/ + COMPONENT ${SERVER_COMPONENT}) + #install(FILES ${RDC_RVS_CONFIG_FILES} + # DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${RDC}/conf + # COMPONENT ${RDC_RVS_LIB_COMPONENT}) endif() diff --git a/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc b/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc index ce0e6e8278..51cb29ef6c 100644 --- a/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc +++ b/rdc_libs/rdc_modules/rdc_rvs/RdcDiagnosticLib.cc @@ -21,15 +21,43 @@ THE SOFTWARE. */ #include +#include +#include + #include "rdc/rdc.h" #include "rdc_lib/RdcDiagnosticLibInterface.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/rdc_common.h" #include "rdc_modules/rdc_rvs/RvsBase.h" -rdc_status_t rdc_diag_init(uint64_t) { return RDC_ST_OK; } +std::unique_ptr rvs_p; -rdc_status_t rdc_diag_destroy() { return RDC_ST_OK; } +bool is_rvs_disabled() { + const char* value = std::getenv("RDC_DISABLE_RVS"); + if (value == nullptr) return false; + + std::string value_str = value; + std::transform(value_str.begin(), value_str.end(), value_str.begin(), + [](unsigned char c) { return std::tolower(c); }); + + const std::vector positive_list = {"yes", "true", "1", "on", "y", "t"}; + + return std::any_of(positive_list.begin(), positive_list.end(), + [&value_str](const char* val) { return value_str == val; }); +} + +rdc_status_t rdc_diag_init(uint64_t) { + if (is_rvs_disabled()) { + return RDC_ST_DISABLED_MODULE; + } + rvs_p = std::unique_ptr(new amd::rdc::RdcRVSBase); + return RDC_ST_OK; +} + +rdc_status_t rdc_diag_destroy() { + rvs_p.reset(); + return RDC_ST_OK; +} rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], uint32_t* test_case_count) { @@ -37,12 +65,11 @@ rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST return RDC_ST_BAD_PARAMETER; } - *test_case_count = 3; - test_cases[0] = RDC_DIAG_RVS_GST_TEST; - test_cases[1] = RDC_DIAG_RVS_MEMBW_TEST; - test_cases[2] = RDC_DIAG_RVS_H2DD2H_TEST; - // Temporarily disabled due to configuration issues - // test_cases[3] = RDC_DIAG_RVS_IET_TEST; + auto test_to_conf = rvs_p->get_test_to_conf(); + *test_case_count = test_to_conf.size(); + for (auto& [key, value] : test_to_conf) { + *test_cases++ = key; + } return RDC_ST_OK; } @@ -52,12 +79,20 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, const char* config, size_t config_size, rdc_diag_test_result_t* result, rdc_diag_callback_t* callback) { + const bool is_custom = config != nullptr && config_size != 0; + rvs_status_t rvs_status = RVS_STATUS_SUCCESS; if (result == nullptr || gpu_count == 0) { return RDC_ST_BAD_PARAMETER; } - amd::rdc::RdcRVSBase rvs_base; + if (rvs_p == nullptr) { + RDC_LOG(RDC_ERROR, "rvs_p is not set!"); + return RDC_ST_FAIL_LOAD_MODULE; + } + + // get test_to_conf + auto test_to_conf = rvs_p->get_test_to_conf(); // init the return data *result = {}; @@ -69,23 +104,39 @@ rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, std::string str = "RVS test [" + test_to_name.at(test_case) + "]"; callback->callback(callback->cookie, str.data()); } + + // if config is given - only run one test and return + // do not care about test_case + if (is_custom) { + rvs_status = rvs_p->run_rvs_app(config, config_size + 1, callback); + if (rvs_status != RVS_STATUS_SUCCESS) { + result->status = RDC_DIAG_RESULT_FAIL; + } + return RDC_ST_OK; + } + switch (test_case) { case RDC_DIAG_RVS_GST_TEST: case RDC_DIAG_RVS_MEMBW_TEST: case RDC_DIAG_RVS_H2DD2H_TEST: case RDC_DIAG_RVS_IET_TEST: { const std::string test_name = "Finished running " + test_to_name.at(test_case); - const std::string predefined_config = test_to_conf.at(test_case); - // +1 to copy null - strncpy_with_null(result->info, test_name.c_str(), test_name.length() + 1); - if (config == nullptr || config_size == 0) { - rvs_status = rvs_base.run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, - callback); - } else { - rvs_status = rvs_base.run_rvs_app(config, config_size, callback); + if (test_to_conf.find(test_case) == test_to_conf.end()) { + RDC_LOG(RDC_ERROR, "cannot find test " << test_to_name.at(test_case)); + return RDC_ST_NOT_FOUND; } + const std::string predefined_config = test_to_conf.at(test_case); + // +1 to copy null + strncpy_with_null(result->info, test_name.c_str(), test_name.length() + 1); + rvs_status = + rvs_p->run_rvs_app(predefined_config.c_str(), predefined_config.length() + 1, callback); break; } + case RDC_DIAG_RVS_CUSTOM: + RDC_LOG(RDC_ERROR, "custom config cannot be bundled with other tests!"); + result->status = RDC_DIAG_RESULT_SKIP; + return RDC_ST_BAD_PARAMETER; + break; default: result->status = RDC_DIAG_RESULT_SKIP; strncpy_with_null(result->info, "Not supported yet", MAX_DIAG_MSG_LENGTH); diff --git a/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc b/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc index 738cdb3866..68b0d24f99 100644 --- a/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc +++ b/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc @@ -23,19 +23,113 @@ THE SOFTWARE. #include +#include + #include "rdc/rdc.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/rdc_common.h" #include "rdc_modules/rdc_rvs/RvsBase.h" #include "rvs/rvs.h" -// TODO: Make generic test -// TODO: Allow for user to override defaults with a custom string +#define CHECK_RVS(STATUS, SESSION) \ + do { \ + static_assert(std::is_same::value || \ + std::is_same::value || \ + std::is_convertible::value, \ + "STATUS must be of type rvs_status_t"); \ + static_assert(std::is_same::value || \ + std::is_same::value, \ + "SESSION must be of type rvs_session_t"); \ + if ((STATUS) != RVS_STATUS_SUCCESS) { \ + RDC_LOG(RDC_ERROR, \ + "RVS failed at[" << __FILE__ << ":" << __LINE__ << "] with status: " << (STATUS)); \ + rvs_session_destroy((SESSION)); \ + return (STATUS); \ + } \ + } while (0) -amd::rdc::RdcRVSBase* amd::rdc::RdcRVSBase::s_instance = nullptr; +namespace amd::rdc { -rvs_status_t amd::rdc::RdcRVSBase::run_rvs_app(const char* config, const size_t config_size, - rdc_diag_callback_t* callback) { +RdcRVSBase* RdcRVSBase::s_instance = nullptr; +RdcRVSBase::RdcRVSBase() { + std::string config_path(get_rocm_path("librdc.so")); + s_instance = this; + // these configs are installed with RDC and are mostly stripped down + // versions of RVS configs + config_path.append("/share/rdc/conf/rvs/"); + amdsmi_processor_handle processor_handle = nullptr; + auto err = get_processor_handle_from_id(0, &processor_handle); + if (err != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "get_processor_handle_from_id failed! " << err); + return; + } + amdsmi_asic_info_t asic_info; + err = amdsmi_get_gpu_asic_info(processor_handle, &asic_info); + if (err != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "amdsmi_get_gpu_asic_info failed! " << err); + return; + } + + auto found_gpu = gfx_to_rvs_conf.find(asic_info.target_graphics_version); + if (found_gpu == gfx_to_rvs_conf.end()) { + // gpu name is not found + RDC_LOG(RDC_INFO, "RVS couldn't match GFX version to name. Using \"default\""); + config_path.append("default"); + } else { + // gpu name is found - look up the name + config_path.append(found_gpu->second); + } + + RDC_LOG(RDC_DEBUG, "RVS CONFIG PATH: " << config_path); + + // populate configs + for (auto& ent : std::filesystem::directory_iterator(config_path)) { + if (ent.is_regular_file()) { + _rvs_config_list.push_back(ent.path().string()); + } + } + + // map test enums to config paths + for (rdc_diag_test_cases_t i = RDC_DIAG_TEST_FIRST; i < RDC_DIAG_TEST_LAST; + i = static_cast(i + 1)) { + if (test_to_name.find(i) == test_to_name.end()) { + continue; + } + for (int j = 0; j < _rvs_config_list.size(); j++) { + std::filesystem::path config_path(_rvs_config_list.at(j)); + // error handling for path + if (!config_path.has_filename()) { + RDC_LOG(RDC_ERROR, "RVS config path has no filename: " << _rvs_config_list.at(j)); + continue; + } + // strip path, only keep filename + std::string config = config_path.filename().string(); + if (test_to_name.at(i) == config) { + _test_to_conf[i] = config_path.string(); + RDC_LOG(RDC_DEBUG, "TEST_ADDED " << test_to_name.at(i) << " = " << _test_to_conf[i]); + } + } + } + + // manually add custom config + _test_to_conf[RDC_DIAG_RVS_CUSTOM] = ""; + + auto status = rvs_initialize(); + if (status != RVS_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "rvs initialization failed"); + } +}; + +RdcRVSBase::~RdcRVSBase() { + if (s_instance == this) { + s_instance = nullptr; + } +}; + +std::vector RdcRVSBase::get_rvs_configs() { return _rvs_config_list; } + +rvs_status_t RdcRVSBase::run_rvs_app(const char* config, const size_t config_size, + rdc_diag_callback_t* callback) { char active_config[MAX_CONFIG_LENGTH]; rvs_session_property_t session_property = {RVS_SESSION_TYPE_DEFAULT_CONF, {{RVS_MODULE_GST}}}; rvs_session_id_t session_id; @@ -44,9 +138,8 @@ rvs_status_t amd::rdc::RdcRVSBase::run_rvs_app(const char* config, const size_t // Meaning RDC index has no impact on RVS index. if ((config == nullptr) || (config_size == 0)) { - RDC_LOG(RDC_INFO, "given config is NULL! Using predefined gst_config"); - strncpy_with_null(active_config, test_to_conf.at(RDC_DIAG_RVS_GST_TEST).c_str(), - test_to_conf.at(RDC_DIAG_RVS_GST_TEST).length()+1); + RDC_LOG(RDC_ERROR, "given config is NULL! Cannot run tests!"); + return RVS_STATUS_INVALID_ARGUMENT; } else if (config_size > MAX_CONFIG_LENGTH) { RDC_LOG(RDC_ERROR, "given config size is too large! Expected at most " << MAX_CONFIG_LENGTH << ", got " << config_size << " instead."); @@ -56,12 +149,6 @@ rvs_status_t amd::rdc::RdcRVSBase::run_rvs_app(const char* config, const size_t strncpy_with_null(active_config, config, config_size); } - status = rvs_initialize(); - if (status == RVS_STATUS_FAILED) { - RDC_LOG(RDC_ERROR, "rvs initialization failed"); - return status; - } - /*******************************/ _state = RVS_SESSION_STATE_IDLE; @@ -71,28 +158,34 @@ rvs_status_t amd::rdc::RdcRVSBase::run_rvs_app(const char* config, const size_t _callback = callback; status = rvs_session_create(&session_id, &RdcRVSBase::static_callback); - session_property.type = RVS_SESSION_TYPE_CUSTOM_ACTION; + CHECK_RVS(status, session_id); + + session_property.type = RVS_SESSION_TYPE_CUSTOM_CONF; session_property.custom_action.config = active_config; status = rvs_session_set_property(session_id, &session_property); + CHECK_RVS(status, session_id); + status = rvs_session_execute(session_id); - if (status != RVS_STATUS_SUCCESS) { - RDC_LOG(RDC_ERROR, "RVS session execute failed with status: " << status); - rvs_session_destroy(session_id); - return status; - } + CHECK_RVS(status, session_id); + // TODO: remove? while (_state != RVS_SESSION_STATE_COMPLETED) { }; _callback = nullptr; status = rvs_session_destroy(session_id); - if (status != RVS_STATUS_SUCCESS) { - RDC_LOG(RDC_ERROR, "RVS session destroy failed with status: " << status); - } + // this will try to destroy the session again, but it shouldn't matter + // I don't want to define a second macro. + CHECK_RVS(status, session_id); return status; } + +std::map RdcRVSBase::get_test_to_conf() { + return _test_to_conf; +} +} // namespace amd::rdc diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/babel.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/babel.conf new file mode 100644 index 0000000000..6602ad933b --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/babel.conf @@ -0,0 +1,51 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# BABEL test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space (e.g.: device: 50599 3245) +# Set parallel execution to false +# Set buffer size to reflect the buffer you want to test +# Set run count to 1 (test will run once) +# + +actions: +- name: babel-256MiB + device: all + module: babel # Name of the module + parallel: true # Parallel true or false + count: 1 # Number of times you want to repeat the test from the begin ( A clean start every time) + num_iter: 5000 # Number of iterations, this many kernels are launched simultaneosuly and stresses the system + array_size: 268435456 # Buffer size the test operates, this is 256 MiB + test_type: 1 # type of test, 1: Float, 2: Double, 3: Triad float, 4: Triad double + mibibytes: true # mibibytes (MiB) or megabytes (MB), true for MiB + o/p_csv: false # o/p as csv file + subtest: 5 # 1: copy 2: copy+mul 3: copy+mul+add 4: copy+mul+add+traid 5: copy+mul+add+traid+dot + dwords_per_lane: 4 # Number of dwords per lane + chunks_per_block: 4 # Number of chunks per block + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gpup_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gpup_single.conf new file mode 100644 index 0000000000..d0f9386f56 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gpup_single.conf @@ -0,0 +1,174 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Run test with testscript or binary: +# +# Using Testscript - +# cd /opt/rocm/share/rocm-validation-suite/testscripts +# sudo ./gpup.new.sh +# +# Using Binary - +# cd /opt/rocm/share/rocm-validation-suite/conf +# cd /opt/rocm/bin +# sudo ./rvs -c /opt/rocm/share/rocm-validation-suite/conf/gpup_single.conf +# +# Note: Paths may vary with the ROCm version or ROCm installation path. + +# GPUP test #1 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# all gpu properties, all io_links properties +# +# Expected result: +# Test passes with displaying all properties values for any GPUs + +actions: +- name: RVS-GPUP-TC1 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #2 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# no regular expressions +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subsets of properties and io_link properties values for any GPUs + +- name: RVS-GPUP-TC2 + device: all + module: gpup + properties: + simd_count: + mem_banks_count: + io_links_count: + vendor_id: + location_id: + max_engine_clk_ccompute: + io_links-properties: + version_major: + type: + version_major: + version_minor: + node_from: + node_to: + recommended_transfer_size: + flags: + +# GPUP test #3 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# all types of devices +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC3 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #4 +# +# Preconditions: +# all AMD compatible GPUs +# a given device type (deviceid filtering), this must be filled based on deviceid in sysfs/ ./rvs -g. +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for all GPUs and given deviceid + +- name: RVS-GPUP-TC4 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #5 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC5 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #6 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subset of properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC6 + device: all + module: gpup + deviceid: 0 + properties: + mem_banks_count: + io_links-properties: + version_major: diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gst_single.conf new file mode 100644 index 0000000000..ba19402fb6 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/gst_single.conf @@ -0,0 +1,132 @@ +# ################################################################################ +# # +# # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + + + +# GST test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set parallel execution to false +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/gst_1.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 5000 gflops +# in maximum 7 seconds and then the GPU sustains the gflops +# for the rest of the test duration (total duration is 18 seconds). +# A single Gflops violation (with a 7% tolerance) is allowed. +# FALSE otherwise + +actions: +- name: gpustress-41000-fp32-false + device: all + module: gst + parallel: true + count: 1 + duration: 10000 + copy_matrix: false + target_stress: 41000 + matrix_size_a: 28000 + matrix_size_b: 28000 + matrix_size_c: 28000 + data_type: fp32_r + lda: 28000 + ldb: 28000 + ldc: 28000 + blas_source: hipblaslt + +- name: gpustress-30000-dgemm-false + device: all + module: gst + parallel: true + count: 1 + #hot_calls: 1000 + duration: 15000 + copy_matrix: false + target_stress: 30000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + matrix_init: trig + ops_type: dgemm + lda: 8192 + ldb: 8192 + ldc: 8192 + +- name: gst-8096-150000-fp16 + device: all + module: gst + parallel: true + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + copy_matrix: false + target_stress: 150000 + matrix_size_a: 8096 + matrix_size_b: 8096 + matrix_size_c: 8096 + data_type: fp16_r + lda: 8096 + ldb: 8096 + ldc: 8096 + ldd: 8096 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + blas_source: hipblaslt + +- name: gst-160Tflops-8K8K8K-rand-i8 + device: all + module: gst + parallel: true + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 500 + copy_matrix: false + target_stress: 160000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + matrix_init: rand + data_type: i8_r + lda: 8192 + ldb: 8192 + ldc: 8192 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + blas_source: hipblaslt + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/iet_stress.conf new file mode 100644 index 0000000000..cdec29bc19 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/iet_stress.conf @@ -0,0 +1,146 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 700 + log_interval: 700 + max_violations: 1 + target_power: 300 + tolerance: 0.06 + matrix_size: 8640 + ops_type: dgemm + +- name: action_2 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 1500 + log_interval: 2000 + max_violations: 1 + target_power: 300 + tolerance: 0.2 + matrix_size: 8640 + ops_type: dgemm + +- name: action_3 + device: all + module: iet + parallel: false + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 500 + log_interval: 500 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: dgemm + +# IET test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/iet4.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches 150W +# in maximum 5 seconds and then the GPU sustains the same power +# for the rest of the test duration (total duration is 10 seconds). +# A single power violation (with a 10% tolerance) is allowed. +# FALSE otherwise + +- name: action_4 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 500 + log_interval: 500 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: sgemm + +# IET test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to false +# Set matrix_size to 8640 (for Vega 10 cards). For Vega 20, the recommended matrix_size is 8640 +# Set run count to 2 (each test will run twice) +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/iet5.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches 50W +# in maximum 5 seconds and then the GPU sustains the same power +# for the rest of the test duration (total duration is 10 seconds). +# A single power violation (with a 10% tolerance) is allowed. +# FALSE otherwise + +- name: action_5 + device: all + module: iet + parallel: false + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 1500 + log_interval: 2000 + max_violations: 1 + target_power: 300 + tolerance: 0.1 + matrix_size: 8640 + ops_type: sgemm + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pbqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pbqt_single.conf new file mode 100644 index 0000000000..d2f21e7f8a --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pbqt_single.conf @@ -0,0 +1,182 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: pbqt + log_interval: 800 + duration: 5000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + block_size: 1000000 2000000 10000000 + device_id: all + +- name: action_2 + device: all + module: pbqt + log_interval: 1000 + count: 3 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_3 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_4 + device: all + module: pbqt + log_interval: 1000 + duration: 5000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_5 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_6 + device: all + module: pbqt + log_interval: 800 + duration: 8000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: false + parallel: false + device_id: all + +- name: action_7 + device: all + module: pbqt + peers: all + count: 1 + test_bandwidth: false + device_id: all + +- name: action_8 + device: all + module: pbqt + peers: all + test_bandwidth: true + bidirectional: true + parallel : true + device_id: all + +- name: action_9 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + test_bandwidth: true + bidirectional: false + parallel: true + device_id: all + +- name: action_10 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: false + parallel: true + +- name: action_11 + device: all + module: pbqt + log_interval: 0 + duration: 10000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: false + device_id: all + +- name: action_12 + device: all + module: pbqt + log_interval: 0 + duration: 1000 + count: 3 + wait: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_13 + device: all + module: pbqt + log_interval: 1000 + duration: 10000 + peers: all + device_id: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_14 + device: all + module: pbqt + log_interval: 500 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + device_id: all diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pebb_single.conf new file mode 100644 index 0000000000..787f286edc --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/pebb_single.conf @@ -0,0 +1,236 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PEBB test #1 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test1.conf -d 3 +# + + +actions: +- name: h2d-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 50000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + +# PEBB test #2 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# +# Run test with : +# cd bin +# ./rvs -c conf/pebb_test2.conf -d 3 +# + + +- name: d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + + +# PEBB test #3 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test3.conf -d 3 +# + +- name: h2d-d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe + + + + +# PEBB test #4 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test4.conf -d 3 + +- name: h2d-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + +# PEBB test #5 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test5.conf -d 3 + +- name: d2h-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + + +# PEBB test #6 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test6.conf -d 3 + +- name: h2d-d2h-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 2 # PCIe + + +# PEBB test #7 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test7.conf -d 3 + +- name: h2d-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: false + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 2 # PCIe + + +# PEBB test #8 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host-to-device and device-to-host +# 4. parallel back-to-back transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test8.conf -d 3 + +- name: d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: true + link_type: 2 # PCIe + +# PEBB test #9 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. PCIe ponly +# 5. parallel back-to-back transfers +# Run test with: +# cd bin +# ./rvs -c conf/pebb_test9.conf -d 3 + +- name: h2d-d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 2 # PCIe diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/tst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/tst_single.conf new file mode 100644 index 0000000000..f7ae8db704 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI210/tst_single.conf @@ -0,0 +1,91 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# TST test +# +# Preconditions: +# Set device to all and execution as sequential. +# Workload set as dgemm operations with matrix size as 8640. +# Throttle temperature set as 100 degree celsius. +# +# Run test with: +# ./rvs -c conf/tst.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU junction temperature +# reaches the target temperature. If it reaches the throttle temperature +# during test duration is also monitored. +# +actions: +- name: action_1 + device: all + device_index: all + module: tst + parallel: false + count: 1 + wait: 100 + duration: 30000 + ramp_interval: 10000 + sample_interval: 2000 + log_interval: 2000 + max_violations: 1 + throttle_temp: 100 + target_temp: 50 + tolerance: 0.06 + matrix_size: 8640 + ops_type: dgemm + +# TST test +# +# Preconditions: +# Set device to all and execution in parallel. +# Workload set as dgemm operations with matrix size as 8640. +# Throttle temperature set as 100 degree celsius. +# +# Run test with: +# ./rvs -c conf/tst.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU junction temperature +# reaches the target temperature. If it reaches the throttle temperature +# during test duration is also monitored. +# +- name: action_2 + device: all + device_index: all + module: tst + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 700 + log_interval: 700 + target_temp: 50 + throttle_temp: 100 + tolerance: 0.06 + matrix_size: 8640 + ops_type: sgemm + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/iet_stress.conf new file mode 100644 index 0000000000..4ce78ee1fa --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/iet_stress.conf @@ -0,0 +1,63 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# IET stress test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by comma. +# Set parallel execution to true (gemm workload execution on all GPUs in parallel) +# Set gemm operation type as dgemm. +# Set matrix_size to 28000. +# Test duration set to 10 mins. +# Target power set to 550W for each GPU. +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/iet_stress.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves power target of 550W. +# + +actions: +- name: iet-stress-550W-dgemm-true + device: all + module: iet + parallel: true + duration: 60000 + ramp_interval: 10000 + sample_interval: 3000 + log_interval: 3000 + target_power: 550 + matrix_size: 28000 + ops_type: dgemm + lda: 28000 + ldb: 28000 + ldc: 28000 + alpha: 1 + beta: 1 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/pebb_single.conf new file mode 100644 index 0000000000..cd27de6b85 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/pebb_single.conf @@ -0,0 +1,229 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PEBB test #1 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +actions: +- name: h2d-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 50000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 4 # XGMI + + +# PEBB test #2 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# +# Run test with : +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 4 # XGMI + + +# PEBB test #3 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: h2d-d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 4 # XGMI + + +# PEBB test #4 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: h2d-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 4 # XGMI + + +# PEBB test #5 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: d2h-parallel-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 4 # XGMI + + +# PEBB test #6 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. parallel transfers +# 5. random block sizes +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: h2d-d2h-xMB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + link_type: 4 # XGMI + + +# PEBB test #7 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device +# 4. parallel transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: h2d-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: false + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 4 # XGMI + + +# PEBB test #8 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host-to-device and device-to-host +# 4. parallel back-to-back transfers +# 5. back-to-back 51MB +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: true + link_type: 4 # XGMI + + +# PEBB test #9 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional +# 4. XGMI only +# 5. parallel back-to-back transfers +# Run test with: +# cd bin +# ./rvs -c conf/MI300A/pebb_single.conf -d 3 +# +- name: h2d-d2h-b2b-51MB + device: all + module: pebb + log_interval: 800 + duration: 34000 + device_to_host: true + host_to_device: true + b2b_block_size: 51200 + parallel: false + link_type: 4 # XGMI + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/babel.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/babel.conf new file mode 100644 index 0000000000..22d2c6a4ba --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/babel.conf @@ -0,0 +1,49 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# BABEL test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space (e.g.: device: 50599 3245) +# Set parallel execution to false +# Set buffer size to reflect the buffer you want to test +# Set run count to 1 (test will run once) +# + +actions: +- name: babel-float-256MiB + device: all + module: babel # Name of the module + parallel: false # Parallel true or false + count: 1 # Number of times you want to repeat the test from the begin ( A clean start every time) + num_iter: 5000 # Number of iterations, this many kernels are launched simultaneosuly and stresses the system + array_size: 268435456 # Buffer size the test operates, this is 256 MiB + test_type: 1 # type of test, 1: Float, 2: Double, 3: Triad float, 4: Triad double + mibibytes: true # mibibytes (MiB) or megabytes (MB), true for MiB + o/p_csv: false # o/p as csv file + subtest: 5 # 1: copy 2: copy+mul 3: copy+mul+add 4: copy+mul+add+traid 5: copy+mul+add+traid+dot + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_ext.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_ext.conf new file mode 100644 index 0000000000..6be9c70274 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_ext.conf @@ -0,0 +1,94 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: gst-1000Tflops-8KB-fp8_r-false + device: all + module: gst + parallel: false + count: 1 + duration: 30000 + copy_matrix: false + target_stress: 1000000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + data_type: fp8_r + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + +- name: gst-1000Tflops-8KB-fp8_r-true + device: all + module: gst + parallel: true + count: 1 + duration: 60000 + copy_matrix: false + target_stress: 1000000 + matrix_size_a: 8192 + matrix_size_b: 8192 + matrix_size_c: 8192 + data_type: fp8_r + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + +- name: gst-500Tflops-4KB-bf16_r-false + device: all + module: gst + parallel: false + count: 1 + duration: 30000 + copy_matrix: false + target_stress: 500000 + matrix_size_a: 4096 + matrix_size_b: 4096 + matrix_size_c: 8192 + data_type: bf16_r + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + +- name: gst-500Tflops-4KB-bf16_r-true + device: all + module: gst + parallel: true + count: 1 + duration: 60000 + copy_matrix: false + target_stress: 500000 + matrix_size_a: 4096 + matrix_size_b: 4096 + matrix_size_c: 8192 + data_type: bf16_r + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_selfcheck.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_selfcheck.conf new file mode 100644 index 0000000000..a7a417adf4 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_selfcheck.conf @@ -0,0 +1,181 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + + +# GST self-check & accuracy-check test - gst-3K-sgemm-check +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set matrices sizes to 3072 * 3072 * 3072 +# Set gemm operation as sgemm +# Set matrix data initialization method as random integer +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# Set target stress GFLOPS as 1215000 (1215 TFLOPS) +# Set self-check gemm self checking as enabled +# Set accu-check gemm accuracy checking as enabled (applicable for sgemm & dgemm only) +# Set error-inject gemm error injection as enabled (For TEST purpose only) +# Set error-freq error injection frequency as 2 (For TEST purpose only) +# Set error-count error injection count as 1 (For TEST purpose only) +# +# Expected result: +# Report self-check and accu-error at regular intervals as per set error parameters. +# The test on each GPU passes (TRUE) if the GPU achieves 100 TFLOPS or more +# within the test duration of 2 mins after ramp-up duration of 5 seconds. +# Else test on the GPU fails (FALSE). +actions: +- name: gst-3K-sgemm-check + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 120000 + hot_calls: 1 + copy_matrix: false + target_stress: 100000 + matrix_size_a: 3072 + matrix_size_b: 3072 + matrix_size_c: 3072 + matrix_init: rand + ops_type: sgemm + lda: 3072 + ldb: 3072 + ldc: 3072 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + self_check: true + accuracy_check: true + error_inject: true + error_freq: 2 + error_count: 1 + +- name: gst-3K-dgemm-check + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 120000 + hot_calls: 1 + copy_matrix: false + target_stress: 85000 + matrix_size_a: 3072 + matrix_size_b: 3072 + matrix_size_c: 3072 + matrix_init: rand + ops_type: dgemm + lda: 3072 + ldb: 3072 + ldc: 3072 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + self_check: true + accuracy_check: true + error_inject: true + error_freq: 2 + error_count: 1 + +- name: gst-3K-fp16-check + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 30000 + hot_calls: 1 + copy_matrix: false + target_stress: 150000 + matrix_size_a: 3072 + matrix_size_b: 3072 + matrix_size_c: 3072 + matrix_init: rand + data_type: fp16_r + lda: 3072 + ldb: 3072 + ldc: 3072 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + self_check: true + error_inject: true + error_freq: 2 + error_count: 1 + +- name: gst-3K-bf16-check + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 30000 + hot_calls: 1 + copy_matrix: false + target_stress: 250000 + matrix_size_a: 3072 + matrix_size_b: 3072 + matrix_size_c: 3072 + matrix_init: rand + data_type: bf16_r + lda: 3072 + ldb: 3072 + ldc: 3072 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + self_check: true + error_inject: true + error_freq: 2 + error_count: 1 + +- name: gst-3K-fp8-check + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 30000 + hot_calls: 1 + copy_matrix: false + target_stress: 300000 + matrix_size_a: 3072 + matrix_size_b: 3072 + matrix_size_c: 3072 + matrix_init: rand + data_type: fp8_r + lda: 3072 + ldb: 3072 + ldc: 3072 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + self_check: true + error_inject: true + error_freq: 2 + error_count: 1 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_single.conf new file mode 100644 index 0000000000..61f7ae17ac --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_single.conf @@ -0,0 +1,186 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# GST test - gst-1215Tflops-4K4K8K-rand-fp8 +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set matrices sizes to 4864 * 4096 * 8192 +# Set matrix data type as fp8 real number +# Set matrix data initialization method as random integer +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# Set target stress GFLOPS as 1215000 (1215 TFLOPS) +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 1215 TFLOPS or more +# within the test duration of 15 seconds after ramp-up duration of 5 seconds. +# Else test on the GPU fails (FALSE). + +actions: +- name: gst-1215Tflops-4K4K8K-rand-fp8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 1215000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: fp8_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-981Tflops-4K4K8K-trig-fp8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 981000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: fp8_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-639Tflops-4K4K8K-rand-fp16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 639000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: fp16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-523Tflops-4K4K8K-trig-fp16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 523000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: fp16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-581Tflops-4K4K8K-rand-bf16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 581000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: bf16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-552Tflops-4K4K8K-trig-bf16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 1000 + copy_matrix: false + target_stress: 552000 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: bf16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_stress.conf new file mode 100644 index 0000000000..a205e2ccfe --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_stress.conf @@ -0,0 +1,63 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# GST test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set parallel execution to true (workload execution on all GPUs in parallel) +# Set matrix_size to 28000. +# Set run count to 1 (each test will run twice) +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# +# Run test with: +# cd bin +# ./rvs -c conf/gst_stress.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 50000 gflops + +actions: +- name: gpustress-50000-dgemm-true + device: all + module: gst + parallel: true + count: 1 + duration: 60000 + copy_matrix: false + target_stress: 50000 + matrix_size_a: 28000 + matrix_size_b: 28000 + matrix_size_c: 28000 + ops_type: dgemm + lda: 28000 + ldb: 28000 + ldc: 28000 + alpha: 1 + beta: 1 + matrix_init: hiprand + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_single.conf new file mode 100644 index 0000000000..fe98601aa7 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_single.conf @@ -0,0 +1,118 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Test #1 - iet-400W-1K-rand-dgemm +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 1024 for dgemm operations +# Set target power to 400 Watts +# Set test duration to 1 min +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/iet_single.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches at least 400 Watts, +# FALSE otherwise + +actions: +- name: iet-400W-1K-rand-dgemm + device: all + module: iet + parallel: true + duration: 60000 + sample_interval: 3000 + target_power: 400 + matrix_size: 1024 + matrix_init: rand + ops_type: dgemm + +# Test #2 - iet-wait-750W-28K-rand-dgemm +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 28000 for dgemm operations +# Set target power to 750 Watts +# Set wait duration to 30 seconds (GPU idle period) +# Set test duration to 2 mins +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/iet_single.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches at least 750 Watts, +# FALSE otherwise + +- name: iet-wait-750W-28K-rand-dgemm + device: all + module: iet + parallel: true + wait: 30000 # Wait for 30 secs before the test starts + duration: 60000 + sample_interval: 3000 + target_power: 750 + matrix_size: 28000 + matrix_init: hiprand + ops_type: dgemm + +# Test #3 - iet-wait-400W-1K-rand-dgemm +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 1024 for dgemm operations +# Set target power to 400 Watts +# Set wait duration to 30 seconds (GPU idle period) +# Set test duration to 1 min +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/iet_single.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches at least 400 Watts, +# FALSE otherwise + +- name: iet-wait-400W-1K-rand-dgemm + device: all + module: iet + parallel: true + wait: 30000 # Wait for 30 secs before the test starts + duration: 60000 + sample_interval: 3000 + log_interval: 3000 + target_power: 400 + matrix_size: 1024 + matrix_init: rand + ops_type: dgemm + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_stress.conf new file mode 100644 index 0000000000..9e52d42c32 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_stress.conf @@ -0,0 +1,64 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# IET stress test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by comma. +# Set parallel execution to true (gemm workload execution on all GPUs in parallel) +# Set gemm operation type as dgemm. +# Set matrix_size to 28000. +# Test duration set to 10 mins. +# Target power set to 750W for each GPU. +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/iet_stress.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves power target of 750W. +# + +actions: +- name: iet-stress-750W-dgemm-true + device: all + module: iet + parallel: true + duration: 60000 + ramp_interval: 10000 + sample_interval: 5000 + log_interval: 5000 + target_power: 750 + matrix_size: 28000 + ops_type: dgemm + lda: 28000 + ldb: 28000 + ldc: 28000 + alpha: 1 + beta: 1 + matrix_init: hiprand + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pbqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pbqt_single.conf new file mode 100644 index 0000000000..6df30137f8 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pbqt_single.conf @@ -0,0 +1,98 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: p2p-unidir-sequential-64MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: false + parallel: false + block_size: 67108864 + device_id: all + +- name: p2p-unidir-parallel-64MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: false + parallel: true + block_size: 67108864 + device_id: all + +- name: p2p-bidir-sequential-64MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: false + block_size: 67108864 + device_id: all + +- name: p2p-bidir-parallel-64MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + block_size: 67108864 + device_id: all + +- name: p2p-bidir-sequential-64-128-256MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: false + block_size: 67108864 134217728 268435456 + device_id: all + +- name: p2p-bidir-parallel-64-128-256MB + device: all + module: pbqt + log_interval: 5000 + duration: 60000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + block_size: 67108864 134217728 268435456 + device_id: all + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pebb_single.conf new file mode 100644 index 0000000000..6c2be740f3 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/pebb_single.conf @@ -0,0 +1,95 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PEBB test #1 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# 4. Transfer block size 64MB +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/pebb_single.conf +# +actions: +- name: d2h-sequential-64MB + device: all + module: pebb + duration: 60000 + device_to_host: true + host_to_device: false + parallel: false + block_size: 67108864 + link_type: 2 # PCIe + +# PEBB test #2 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. device to host +# 4. Transfer block size 64MB +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/pebb_single.conf +# +- name: h2d-sequential-64MB + device: all + module: pebb + duration: 60000 + device_to_host: false + host_to_device: true + parallel: false + block_size: 67108864 + link_type: 2 # PCIe + +# PEBB test #3 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. host to device & device to host +# 4. Transfer block size 64MB +# +# Run test with: +# cd bin +# ./rvs -c conf/MI300X/pebb_single.conf +# +- name: h2d-d2h-sequential-64MB + device: all + module: pebb + duration: 60000 + device_to_host: true + host_to_device: true + parallel: false + block_size: 67108864 + link_type: 2 # PCIe + warm_calls: 10 + hot_calls: 100 + b2b: true + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/babel.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/babel.conf new file mode 100644 index 0000000000..15b9dd98d6 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/babel.conf @@ -0,0 +1,51 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# BABEL test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space (e.g.: device: 50599 3245) +# Set parallel execution to false +# Set buffer size to reflect the buffer you want to test +# Set run count to 1 (test will run once) +# + +actions: +- name: babel-float-256MiB + device: all + module: babel # Name of the module + parallel: false # Parallel true or false + count: 1 # Number of times you want to repeat the test from the begin ( A clean start every time) + num_iter: 5000 # Number of iterations, this many kernels are launched simultaneosuly and stresses the system + array_size: 268435456 # Buffer size the test operates, this is 256 MiB + test_type: 1 # type of test, 1: Float, 2: Double, 3: Triad float, 4: Triad double + mibibytes: true # mibibytes (MiB) or megabytes (MB), true for MiB + o/p_csv: false # o/p as csv file + subtest: 5 # 1: copy 2: copy+mul 3: copy+mul+add 4: copy+mul+add+traid 5: copy+mul+add+traid+dot + dwords_per_lane: 4 # Number of dwords per lane + chunks_per_block: 4 # Number of chunks per block + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_single.conf new file mode 100644 index 0000000000..799164793d --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_single.conf @@ -0,0 +1,256 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# GST test - gst-96Tflops-8K12K4K-trig-tf32 +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set matrices sizes to 8192 * 12288 * 4096 +# Set matrix data type as fp32 real number +# Set compute type as tf32 (xf32) +# Set matrix data initialization method as trignometric float +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# Set target stress GFLOPS as 96 TFLOPS +# Set blas source (backend) as hipblaslt +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 96 TFLOPS or more +# within the test duration of 15 seconds after ramp-up duration of 5 seconds. +# Else test on the GPU fails (FALSE). + +actions: +- name: gst-96Tflops-8K12K4K-trig-tf32 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 50 + copy_matrix: false + target_stress: 96000 + matrix_size_a: 8192 + matrix_size_b: 12288 + matrix_size_c: 4096 + matrix_init: trig + data_type: fp32_r + compute_type: xf32_r + transa: 0 + transb: 0 + alpha: 1 + beta: 1 + blas_source: hipblaslt + parallel: true + +- name: gst-406Tflops-8K13K17K-trig-i8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 500 + copy_matrix: false + target_stress: 406000 + matrix_size_a: 8192 + matrix_size_b: 13312 + matrix_size_c: 17792 + matrix_init: trig + data_type: i8_r + compute_type: i32_r + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + blas_source: hipblaslt + parallel: true + +- name: gst-26Tflops-8K8K8K-trig-fp32 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 100 + copy_matrix: false + target_stress: 26000 + matrix_size_a: 8192 + matrix_size_b: 8960 + matrix_size_c: 8192 + matrix_init: trig + data_type: fp32_r + compute_type: fp32_r + transa: 0 + transb: 0 + alpha: 1 + beta: 1 + blas_source: hipblaslt + parallel: true + + +- name: gst-343Tflops-4K4K8K-rand-fp8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 3000 + copy_matrix: false + target_stress: 343415 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: fp8_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-336Tflops-4K4K8K-trig-fp8 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 170000 + copy_matrix: false + target_stress: 336441 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: fp8_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-176Tflops-4K4K8K-rand-fp16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 3000 + copy_matrix: false + target_stress: 176191 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: fp16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-172Tflops-4K4K8K-trig-fp16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 90000 + copy_matrix: false + target_stress: 172333 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: fp16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-174Tflops-4K4K8K-rand-bf16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 3000 + copy_matrix: false + target_stress: 174364 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: rand + data_type: bf16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + +- name: gst-172Tflops-4K4K8K-trig-bf16 + device: all + module: gst + log_interval: 3000 + ramp_interval: 5000 + duration: 15000 + hot_calls: 90000 + copy_matrix: false + target_stress: 172333 + matrix_size_a: 4864 + matrix_size_b: 4096 + matrix_size_c: 8192 + matrix_init: trig + data_type: bf16_r + lda: 8320 + ldb: 8320 + ldc: 4992 + ldd: 4992 + transa: 1 + transb: 0 + alpha: 1 + beta: 0 + parallel: true + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_thermal.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_thermal.conf new file mode 100644 index 0000000000..b95cf6d705 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/gst_thermal.conf @@ -0,0 +1,70 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# GST thermal test - gst-thermal-dgemm-true +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space +# Set matrices sizes to 8640 * 8640 * 8640 +# Set matrices batch size to 96 +# Set gemm operation type as dgemm real +# Set gemm operation mode as batched strided gemm +# Set matrix data initialization method as hip random integer +# Set copy_matrix to false (the matrices will be copied to GPUs only once) +# Set target stress GFLOPS as 24700 GFLOPS (~24.7 TFLOPS) +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves 24.7 TFLOPS or more +# within the test duration of 10 mins after ramp-up duration of 10 seconds. +# Else test on the GPU fails (FALSE). + +actions: +- name: gst-thermal-dgemm-true + device: all + module: gst + parallel: true + log_interval: 5000 + ramp_interval: 10000 + duration: 60000 + copy_matrix: false + target_stress: 24700 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + lda: 8640 + ldb: 8640 + ldc: 8640 + ldd: 8640 + gemm_mode: strided_batched + batch_size: 96 + matrix_init: hiprand + ops_type: dgemm + transa: 0 + transb: 1 + alpha: 2.71828 + beta: 3.14159 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_single.conf new file mode 100644 index 0000000000..ddc900b2c2 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_single.conf @@ -0,0 +1,110 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Test #1 - iet-260W-1K-rand-dgemm +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 1024 for dgemm operations +# Set target power to 260 Watts +# Set test duration to 2 mins +# +# Run test with: +# cd bin +# ./rvs -c conf/MI308X/iet_single.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches at least 260 Watts, +# FALSE otherwise + +actions: +- name: iet-260W-1K-rand-dgemm + device: all + module: iet + parallel: true + duration: 60000 + sample_interval: 1000 + target_power: 260 + matrix_size: 1024 + matrix_init: rand + ops_type: dgemm + +# Test #2 - iet-wait-350W-8K-rand-dgemm +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# Set parallel execution to true +# Set matrix_size to 8096 for dgemm operations +# Set target power to 350 Watts +# Set wait duration to 30 seconds (GPU idle period) +# Set test duration to 2 mins +# +# Run test with: +# cd bin +# ./rvs -c conf/MI308X/iet_single.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU power reaches at least 350 Watts, +# FALSE otherwise + +- name: iet-wait-350W-8K-rand-dgemm + device: all + module: iet + parallel: true + wait: 30000 # Wait for 30 secs before the test starts + duration: 60000 + sample_interval: 1000 + target_power: 350 + matrix_size: 8096 + matrix_init: rand + ops_type: dgemm + +- name: iet-wait-420W-16K-rand-dgemm + device: all + module: iet + parallel: true + wait: 30000 # Wait for 30 secs before the test starts + duration: 60000 + sample_interval: 1000 + target_power: 420 + matrix_size: 16182 + matrix_init: rand + ops_type: dgemm + +- name: iet-wait-stress-650W-bw + device: all + module: iet + parallel: true + wait: 30000 # Wait for 30 secs before the test starts + duration: 60000 + sample_interval: 1000 + target_power: 650 + bw_workload: true + cp_workload: false + tolerance: 0.05 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_stress.conf new file mode 100644 index 0000000000..ba6662ca1f --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_stress.conf @@ -0,0 +1,58 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# IET stress test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by comma. +# Set parallel execution to true (gemm workload execution on all GPUs in parallel) +# Test duration set to 10 mins. +# Target power set to 650W for each GPU. +# Tolerance set to 5% of target power. +# +# Run test with: +# cd bin +# ./rvs -c conf/MI308X/iet_stress.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves power target of 750W. +# + +actions: +- name: iet-stress-650W-true + device: all + module: iet + parallel: true + duration: 60000 + ramp_interval: 1000 + sample_interval: 5000 + log_interval: 5000 + target_power: 650 + tolerance: 0.05 + bw_workload: true + cp_workload: false + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_thermal.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_thermal.conf new file mode 100644 index 0000000000..ff4cbc00f1 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_thermal.conf @@ -0,0 +1,69 @@ +# ################################################################################ +# # +# # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# IET thermal test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by comma. +# Set parallel execution to true (gemm workload execution on all GPUs in parallel) +# Test duration set to 10 mins. +# Target power set to 600W for each GPU. +# +# Run test with: +# cd bin +# ./rvs -c conf/MI308X/iet_thermal.conf +# +# Expected result: +# The test on each GPU passes (TRUE) if the GPU achieves power target of 600W. +# + +actions: +- name: iet-thermal-dgemm-true + device: all + module: iet + parallel: true + sample_interval: 5000 + ramp_interval: 20000 + duration: 60000 + copy_matrix: false + target_power: 600 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + lda: 8640 + ldb: 8640 + ldc: 8640 + ldd: 8640 + gemm_mode: strided_batched + batch_size: 96 + matrix_init: hiprand + ops_type: dgemm + transa: 0 + transb: 1 + alpha: 2.71828 + beta: 3.14159 + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/default b/rdc_libs/rdc_modules/rdc_rvs/conf/default new file mode 120000 index 0000000000..300b804331 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/default @@ -0,0 +1 @@ +nv21 \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gpup_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gpup_single.conf new file mode 120000 index 0000000000..1d8580ce60 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gpup_single.conf @@ -0,0 +1 @@ +../nv31/gpup_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_single.conf new file mode 100644 index 0000000000..bc059d7405 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_single.conf @@ -0,0 +1,41 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: gpustress-9000-sgemm-false + device: all + module: gst + parallel: true + count: 1 + duration: 10000 + copy_matrix: false + target_stress: 6000 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + ops_type: sgemm + lda: 8640 + ldb: 8640 + ldc: 8640 diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_stress_3_hrs.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_stress_3_hrs.conf new file mode 120000 index 0000000000..45d1697def --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_stress_3_hrs.conf @@ -0,0 +1 @@ +../nv31/gst_stress_3_hrs.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/iet_stress.conf new file mode 120000 index 0000000000..db60cd5cc0 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/iet_stress.conf @@ -0,0 +1 @@ +../nv31/iet_stress.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/mem.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/mem.conf new file mode 120000 index 0000000000..f9b78f3b59 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/mem.conf @@ -0,0 +1 @@ +../nv31/mem.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pbqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pbqt_single.conf new file mode 120000 index 0000000000..b113e10ae6 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pbqt_single.conf @@ -0,0 +1 @@ +../nv31/pbqt_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pebb_single.conf new file mode 120000 index 0000000000..e0dc0ba73d --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pebb_single.conf @@ -0,0 +1 @@ +../nv31/pebb_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/peqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/peqt_single.conf new file mode 120000 index 0000000000..8a87ad9b3a --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/peqt_single.conf @@ -0,0 +1 @@ +../nv31/peqt_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pesm_1.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pesm_1.conf new file mode 120000 index 0000000000..4066c37636 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pesm_1.conf @@ -0,0 +1 @@ +../nv31/pesm_1.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/rcqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/rcqt_single.conf new file mode 120000 index 0000000000..9795221fef --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv21/rcqt_single.conf @@ -0,0 +1 @@ +../nv31/rcqt_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gpup_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gpup_single.conf new file mode 100644 index 0000000000..9d1e637c66 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gpup_single.conf @@ -0,0 +1,174 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Run test with testscript or binary: +# +# Using Testscript - +# cd /opt/rocm/share/rocm-validation-suite/testscripts +# sudo ./gpup.new.sh +# +# Using Binary - +# cd /opt/rocm/share/rocm-validation-suite/conf +# cd /opt/rocm/bin +# sudo ./rvs -c /opt/rocm/share/rocm-validation-suite/conf/gpup_single.conf +# +# Note: Paths may vary with the ROCm version or ROCm installation path. + +# GPUP test #1 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# all gpu properties, all io_links properties +# +# Expected result: +# Test passes with displaying all properties values for any GPUs + +actions: +- name: RVS-GPUP-TC1 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #2 +# +# Preconditions: +# all AMD compatible GPUs +# all types of devices +# no regular expressions +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subsets of properties and io_link properties values for any GPUs + +- name: RVS-GPUP-TC2 + device: all + module: gpup + properties: + simd_count: + mem_banks_count: + io_links_count: + vendor_id: + location_id: + max_engine_clk_ccompute: + io_links-properties: + version_major: + type: + version_major: + version_minor: + node_from: + node_to: + recommended_transfer_size: + flags: + +# GPUP test #3 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# all types of devices +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC3 + device: all + module: gpup + properties: + all: + io_links-properties: + all: + +# GPUP test #4 +# +# Preconditions: +# all AMD compatible GPUs +# a given device type (deviceid filtering), this must be filled based on deviceid in sysfs/ ./rvs -g. +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for all GPUs and given deviceid + +- name: RVS-GPUP-TC4 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #5 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# all gpu properties, all io_link properties +# +# Expected result: +# Test passes with displaying all properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC5 + device: all + module: gpup + deviceid: 0 + properties: + all: + io_links-properties: + all: + +# GPUP test #6 +# +# Preconditions: +# only a subset of AMD compatible GPUs (device filtering) +# a given device type (deviceid filtering) this must be filled based on deviceid in sysfs/ ./rvs -g +# Default is 0=> no filtering +# only a subset of gpu properties, only a subset of io_link properties +# +# Expected result: +# Test passes with displaying subset of properties and io_link properties values for subset of GPUs and given deviceid +# +# Note: +# Testing specific device, if device numbers are changed in system it should be changed in the test + +- name: RVS-GPUP-TC6 + device: all + module: gpup + deviceid: 0 + properties: + mem_banks_count: + io_links-properties: + version_major: diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_single.conf new file mode 100644 index 0000000000..af70c801dc --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_single.conf @@ -0,0 +1,41 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: gpustress-10000-sgemm-false + device: all + module: gst + parallel: true + count: 1 + duration: 10000 + copy_matrix: false + target_stress: 10000 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + ops_type: sgemm + lda: 8640 + ldb: 8640 + ldc: 8640 diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_stress_3_hrs.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_stress_3_hrs.conf new file mode 100644 index 0000000000..6bb341be20 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_stress_3_hrs.conf @@ -0,0 +1,43 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: gpustress-3hrs + device: all + module: gst + parallel: true + count: 1 + duration: 10800000 + ramp_interval: 300000 + log_interval: 6000 + target_stress: 5000 + max_violations: 1 + copy_matrix: false + tolerance: 0.01 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + ops_type: sgemm + diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/iet_stress.conf new file mode 100644 index 0000000000..f5b0d9601d --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/iet_stress.conf @@ -0,0 +1,41 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: iet + parallel: true + count: 1 + wait: 100 + duration: 50000 + ramp_interval: 5000 + sample_interval: 700 + log_interval: 700 + max_violations: 1 + target_power: 127 + tolerance: 0.06 + matrix_size: 8640 + ops_type: dgemm diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/mem.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/mem.conf new file mode 100644 index 0000000000..9901c2b63f --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/mem.conf @@ -0,0 +1,68 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Memory test +# +# Preconditions: +# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g +# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify +# all the GPUs IDs separated by white space (e.g.: device: 50599 3245) +# Set run count to how many times we want each test to run +# +# Run test with: +# ./rvs -c conf/mem.conf -d 3 +# +# Expected result: +# The test on each GPU passes (TRUE) if no memory errors are seen +# FALSE otherwise +# +# To omit individual actions specify number of test in exclude tag's value, numbers as specified below +# 0: Walking 1 bit +# 1: Own address test +# 2: Moving inversions, ones&zeros +# 3: Moving inversions, 8 bit pattern +# 4: Moving inversions, random pattern +# 5: Block move, 64 moves +# 6: Moving inversions, 32 bit pattern +# 7: Random number sequence +# 8: Modulo 20, random pattern +# 9: Bit fade test +# 10: Memory stress test +# + +actions: +- name: action_1 + device: all + module: mem + parallel: true + count: 1 + wait: 100 + mapped_memory: false + mem_blocks: 128 + num_passes: 500 + thrds_per_blk: 64 + stress: true + num_iter: 50000 + exclude : 9 10 diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pbqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pbqt_single.conf new file mode 100644 index 0000000000..8446ad8f6b --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pbqt_single.conf @@ -0,0 +1,182 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: action_1 + device: all + module: pbqt + log_interval: 800 + duration: 5000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + block_size: 1000000 2000000 10000000 + device_id: all + +- name: action_2 + device: all + module: pbqt + log_interval: 1000 + count: 3 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_3 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_4 + device: all + module: pbqt + log_interval: 1000 + duration: 5000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_5 + device: all + module: pbqt + log_interval: 800 + duration: 4000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: true + parallel: true + device_id: all + +- name: action_6 + device: all + module: pbqt + log_interval: 800 + duration: 8000 + count: 1 + peers: all + test_bandwidth: true + bidirectional: false + parallel: false + device_id: all + +- name: action_7 + device: all + module: pbqt + peers: all + count: 1 + test_bandwidth: false + device_id: all + +- name: action_8 + device: all + module: pbqt + peers: all + test_bandwidth: true + bidirectional: true + parallel : true + device_id: all + +- name: action_9 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + test_bandwidth: true + bidirectional: false + parallel: true + device_id: all + +- name: action_10 + device: all + module: pbqt + log_interval: 500 + duration: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: false + parallel: true + +- name: action_11 + device: all + module: pbqt + log_interval: 0 + duration: 10000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: false + device_id: all + +- name: action_12 + device: all + module: pbqt + log_interval: 0 + duration: 1000 + count: 3 + wait: 1000 + peers: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_13 + device: all + module: pbqt + log_interval: 1000 + duration: 10000 + peers: all + device_id: all + peer_device_id: all + test_bandwidth: true + bidirectional: true + parallel: true + +- name: action_14 + device: all + module: pbqt + log_interval: 500 + duration: 10000 + peers: all + test_bandwidth: true + bidirectional: true + device_id: all diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pebb_single.conf new file mode 100644 index 0000000000..5943132ff9 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pebb_single.conf @@ -0,0 +1,43 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PEBB test #3 +# +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. bidirectional + +actions: +- name: h2d-d2h-sequential-51MB + device: all + module: pebb + log_interval: 800 + duration: 5000 + device_to_host: true + host_to_device: true + parallel: true + block_size: 51200000 + link_type: 2 # PCIe diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/peqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/peqt_single.conf new file mode 100644 index 0000000000..bffae5d54b --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/peqt_single.conf @@ -0,0 +1,593 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# Run test with testscript or binary: +# +# Using Testscript - +# cd /opt/rocm/share/rocm-validation-suite/testscripts +# sudo ./peqt.new.sh +# +# Using Binary - +# cd /opt/rocm/share/rocm-validation-suite/conf +# cd /opt/rocm/bin +# ./rvs -c /opt/rocm/share/rocm-validation-suite/conf/peqt_single.conf +# +# Note: Paths may vary with the ROCm version or ROCm installation path. + +# PEQT test #1 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. no regular expressions +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if at least one AMD compatible GPU is registered within the system, FALSE otherwise + + +actions: +- name: pcie_act_1 + device: all + module: peqt + capability: + link_cap_max_speed: + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #2 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 2. all types of devices +# 3. no regular expressions +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list, FALSE otherwise + + +- name: pcie_act_2 + module: peqt + device: all + capability: + link_cap_max_speed: + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + atomic_op_128_CAS_completer: + +# PEQT test #3 +# testing conditions: +# 1. all AMD compatible GPUs +# 3. no regular expressions +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if at least one AMD compatible GPU (registered within the system), FALSE otherwise + + +- name: pcie_act_3 + module: peqt + device: all + capability: + link_cap_max_speed: + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #4 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. no regular expressions +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list +# , FALSE otherwise + + +- name: pcie_act_4 + module: peqt + device: all + capability: + link_cap_max_speed: + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #5 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 2. a given device type (deviceid filtering)(replace 0 with appropriate deviceid +# 3. no regular expressions +# 4. only a subset of PCIe capabilities +# Expected PCIe check RESULT = TRUE if at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list +# and also matches the , FALSE otherwise + + +- name: pcie_act_5 + module: peqt + device: all + deviceid: 0 + capability: + link_cap_max_speed: + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + dev_serial_num: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #6 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. a simple regular expression for capability +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if all values match the given regular expression +# and at least one AMD compatible GPU is registered within the system +# FALSE otherwise + +- name: pcie_act_6 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #7 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. 2 simple regular expressions, as follows: one for capability and another one for the +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one AMD compatible GPU is registered within the system and +# - all values match the given regular expression and +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_7 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: +# PEQT test #8 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. 3 simple regular expressions, as follows: one for capability, another one for the and an erroneous one for +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one AMD compatible GPU is registered within the system and +# - all values match the given regular expression and +# - all values match the given regular expression +# FALSE otherwise +# Notice: regular expression is not valid and will be skipped +# without affecting the PEQT modules' check RESULT (however, an error will be logged out) + + +- name: pcie_act_8 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: + slot_pwr_limit_value: '[a-b][d-' + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #9 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 2. all types of devices +# 3. 2 simple regular expressions, as follows: one for capability and another one for the +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list and +# - all values match the given regular expression and +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_9 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #10 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. 3 simple regular expressions, as follows: one for capability, another one for the and one for +# 3. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_10 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: ^amdgpu$ + dev_serial_num: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #11 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. 3 simple regular expressions, as follows: one for capability, another one for the and one for +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the lis +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_11 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: ^amdgpu$ + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #12 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. 3 simple regular expressions, as follows: one for capability, another one for the and one for +# 4. only a subset of PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list +# - all values match the given regular expression and +# - all value smatch the given regular expression and +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_12 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: + link_stat_cur_speed: '^(\d+ GT\/s)$' + vendor_id: + kernel_driver: ^amdgpu$ + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #13 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. 5 simple regular expressions, as follows: +# - one for PCIe capability +# - one for the PCIe capability +# - one for +# - one for PCIe capability +# - one for PCIe capability +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - at least one of the AMD compatible GPUs (registered within the system) matches one of the GPU ID in the list +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression +# FALSE otherwise + + +- name: pcie_act_13 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: ^(x8|x16)$ + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: ^(x8|x16)$ + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: ^amdgpu$ + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: + atomic_op_32_completer: + atomic_op_64_completer: + atomic_op_128_CAS_completer: + +# PEQT test #14 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. 6 simple regular expressions, as follows: +# - one for PCIe capability +# - one for the PCIe capability +# - one for +# - one for PCIe capability +# - one for PCIe capability +# - one for PCIe capability +# 4. all PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression (4 TRUE/FALSE values with whitespace between them) +# FALSE otherwise + + +- name: pcie_act_14 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: ^(x8|x16)$ + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: ^(x8|x16)$ + slot_pwr_limit_value: + slot_physical_num: + deviceid: + vendor_id: + kernel_driver: ^amdgpu$ + dev_serial_num: + D0_Maximum_Power_12V: + D0_Maximum_Power_3_3V: + D0_Sustained_Power_12V: + D0_Sustained_Power_3_3V: + atomic_op_routing: ^((TRUE|FALSE){1})$ + atomic_op_32_completer: ^((TRUE|FALSE){1})$ + atomic_op_64_completer: ^((TRUE|FALSE){1})$ + atomic_op_128_CAS_completer: ^((TRUE|FALSE){1})$ + +# PEQT test #15 +# testing conditions: +# 1. only a subset of AMD compatible GPUs (device filtering) +# 3. 6 simple regular expressions, as follows: +# - one for PCIe capability +# - one for the PCIe capability +# - one for +# - one for PCIe capability +# - one for PCIe capability +# - one for PCIe capability +# 4. only a subset of PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression (4 TRUE/FALSE values with whitespace between them) +# FALSE otherwise + + +- name: pcie_act_15 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: ^(x8|x16)$ + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: ^(x8|x16)$ + kernel_driver: ^amdgpu$ + atomic_op_routing: ^((TRUE|FALSE){1})$ + atomic_op_32_completer: ^((TRUE|FALSE){1})$ + +# PEQT test #16 +# testing conditions: +# 1. all AMD compatible GPUs +# 3. 6 simple regular expressions, as follows: +# - one for PCIe capability +# - one for the PCIe capability +# - one for +# - one for PCIe capability +# - one for PCIe capability +# - one for PCIe capability +# 4. only a subset of PCIe capabilities +# Expected PCIe check RESULT = TRUE if +# - all values match the given regular expression and +# - all values match the given regular expression and +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression +# - all values match the given regular expression (4 TRUE/FALSE values with whitespace between them) +# FALSE otherwise + + +- name: pcie_act_16 + module: peqt + device: all + capability: + link_cap_max_speed: '^(\d+ GT\/s)$' + link_cap_max_width: ^(x8|x16)$ + link_stat_cur_speed: '^(\d+ GT\/s)$' + link_stat_neg_width: ^(x8|x16)$ + kernel_driver: ^amdgpu$ + atomic_op_routing: ^((TRUE|FALSE){1})$ + atomic_op_32_completer: ^((TRUE|FALSE){1})$ + atomic_op_64_completer: ^((TRUE|FALSE){1})$ + atomic_op_128_CAS_completer: ^((TRUE|FALSE){1})$ + +# PEQT test #17 +# testing conditions: +# 1. all AMD compatible GPUs +# 2. all types of devices +# 3. no regular expressions +# 4. bus and slot number +# Expected PCIe check RESULT = TRUE if at least one AMD compatible GPU is registered within the system, FALSE otherwise + + +- name: pcie_act_17 + module: peqt + device: all + capability: + bus_id: + slot_physical_num: diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pesm_1.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pesm_1.conf new file mode 100644 index 0000000000..b6fb3ba921 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pesm_1.conf @@ -0,0 +1,47 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +# PESM test #1 +# +# Preconditions: +# Set device id to an existing AMD deviceid values +# +# Run test with: +# cd bin +# sudo ./rvs -c conf/pesm2.conf +# +# Expected result: +# Test passes without displaying data for any GPUs +actions: +- name: act1 + device: all + deviceid: 26720 + module: pesm + monitor: true +- name: act2 + device: all + debugwait: 3000 + module: pesm + monitor: false diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/rcqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/rcqt_single.conf new file mode 100644 index 0000000000..17deb944ce --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv31/rcqt_single.conf @@ -0,0 +1,36 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: + +- name: action_1 + device: all + module: rcqt + package: rocm-hip-sdk + +- name: action_2 + device: all + module: rcqt + packagelist: rocm-hip-libraries rocm-core rocm-dev rocm-hip-runtime-devel rocm-language-runtime rocm-hip-runtime rocm-hip-sdk rocm-utils rocm-smi-lib rocalution rocm-debug-agent rocm-clang-ocl rocm-device-libs hsa-rocr-devel hipcub-devel rocm-ocl-icd rocsolver rocsparse rocsolver-devel rocminfo hipfft-devel rocm-gdb rocm-dbgapi rocfft hipblas-devel rocthrust-devel openmp-extras comgr rccl rocblas hipblas roctracer-dev hip-doc amdgpu-install rocrand hsa-rocr hipfft hipsparse-devel rocsparse-devel rocrand-devel rocm-opencl hip-devel rocprim-devel hipsolver-devel rocfft-devel hsa-amd-aqlprofile hipify-clang miopen-hip-devel rocm-llvm hip-runtime-amd hip-samples rocalution-devel rccl-devel hipsolver rocprofiler-dev miopen-hip rocm-cmake hipsparse rocblas-devel rocm-opencl-devel diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gpup_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gpup_single.conf new file mode 120000 index 0000000000..1d8580ce60 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gpup_single.conf @@ -0,0 +1 @@ +../nv31/gpup_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_single.conf new file mode 100644 index 0000000000..6e5377ae36 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_single.conf @@ -0,0 +1,41 @@ +# ################################################################################ +# # +# # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. +# # +# # MIT LICENSE: +# # Permission is hereby granted, free of charge, to any person obtaining a copy of +# # this software and associated documentation files (the "Software"), to deal in +# # the Software without restriction, including without limitation the rights to +# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# # of the Software, and to permit persons to whom the Software is furnished to do +# # so, subject to the following conditions: +# # +# # The above copyright notice and this permission notice shall be included in all +# # copies or substantial portions of the Software. +# # +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# # SOFTWARE. +# # +# ############################################################################### + +actions: +- name: gpustress-9000-sgemm-false + device: all + module: gst + parallel: true + count: 1 + duration: 10000 + copy_matrix: false + target_stress: 6000 + matrix_size_a: 8640 + matrix_size_b: 8640 + matrix_size_c: 8640 + ops_type: sgemm + lda: 8640 + ldb: 8640 + ldc: 8640 diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_stress_3_hrs.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_stress_3_hrs.conf new file mode 120000 index 0000000000..45d1697def --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_stress_3_hrs.conf @@ -0,0 +1 @@ +../nv31/gst_stress_3_hrs.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/iet_stress.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/iet_stress.conf new file mode 120000 index 0000000000..db60cd5cc0 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/iet_stress.conf @@ -0,0 +1 @@ +../nv31/iet_stress.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/mem.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/mem.conf new file mode 120000 index 0000000000..f9b78f3b59 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/mem.conf @@ -0,0 +1 @@ +../nv31/mem.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pbqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pbqt_single.conf new file mode 120000 index 0000000000..b113e10ae6 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pbqt_single.conf @@ -0,0 +1 @@ +../nv31/pbqt_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pebb_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pebb_single.conf new file mode 120000 index 0000000000..e0dc0ba73d --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pebb_single.conf @@ -0,0 +1 @@ +../nv31/pebb_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/peqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/peqt_single.conf new file mode 120000 index 0000000000..8a87ad9b3a --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/peqt_single.conf @@ -0,0 +1 @@ +../nv31/peqt_single.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pesm_1.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pesm_1.conf new file mode 120000 index 0000000000..4066c37636 --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pesm_1.conf @@ -0,0 +1 @@ +../nv31/pesm_1.conf \ No newline at end of file diff --git a/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/rcqt_single.conf b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/rcqt_single.conf new file mode 120000 index 0000000000..9795221fef --- /dev/null +++ b/rdc_libs/rdc_modules/rdc_rvs/conf/nv32/rcqt_single.conf @@ -0,0 +1 @@ +../nv31/rcqt_single.conf \ No newline at end of file