diff --git a/CMakeLists.txt b/CMakeLists.txt index a6265607d1..cccf8c135c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,7 @@ set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_counters.cc") +set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_kfd.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.c") set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h") @@ -100,6 +101,7 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_counters.h") +set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_kfd.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h") set(SMI_EXAMPLE_EXE "rocm_smi_ex") diff --git a/docs/ROCm_SMI_Manual.pdf b/docs/ROCm_SMI_Manual.pdf index 5f651d48c3..7f65a42ac1 100644 Binary files a/docs/ROCm_SMI_Manual.pdf and b/docs/ROCm_SMI_Manual.pdf differ diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 396a3de8aa..f270bcd36d 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -586,6 +586,14 @@ typedef struct { uint64_t uncorrectable_err; //!< Accumulated uncorrectable errors } rsmi_error_count_t; +/** + * @brief This structure contains information specific to a process. + */ +typedef struct { + uint32_t process_id; //!< Process ID + uint32_t pasid; //!< PASID +} rsmi_process_info_t; + /*****************************************************************************/ /** @defgroup InitShutAdmin Initialization and Shutdown * These functions are used for initialization of ROCm SMI and clean up when @@ -1843,6 +1851,70 @@ rsmi_counter_available_counters_get(uint32_t dv_ind, rsmi_event_group_t grp, uint32_t *available); /** @} */ // end of PerfCntr +/*****************************************************************************/ +/** @defgroup SysInfo System Information Functions + * These functions are used to configure, query and control performance + * counting. + * @{ + */ + +/** + * @brief Get process information about processes currently using GPU + * + * @details Given a non-NULL pointer to an array @p procs of + * ::rsmi_process_info_t's, of length *@p num_items, this function will write + * up to *@p num_items instances of ::rsmi_process_info_t to the memory pointed + * to by @p procs. These instances contain information about each process + * utilizing a GPU. If @p procs is not NULL, @p num_items will be updated with + * the number of processes actually written. If @p procs is NULL, @p num_items + * will be updated with the number of processes for which there is current + * process information. Calling this function with @p procs being NULL is a way + * to determine how much memory should be allocated for when @p procs is not + * NULL. + * + * @param[inout] procs a pointer to memory provided by the caller to which + * process information will be written. This may be NULL in which case only @p + * num_items will be updated with the number of processes found. + * + * @param[inout] num_items A pointer to a uint32_t, which on input, should + * contain the amount of memory in ::rsmi_process_info_t's which have been + * provided by the @p procs argument. On output, if @p procs is non-NULL, this + * will be updated with the number ::rsmi_process_info_t structs actually + * written. If @p procs is NULL, this argument will be updated with the number + * processes for which there is information. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call + * + * ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if there were more + * processes for which information was available, but not enough space was + * provided as indicated by @p procs and @p num_items, on input. + */ +rsmi_status_t +rsmi_compute_process_info_get(rsmi_process_info_t *procs, uint32_t *num_items); + +/** + * @brief Get process information about a specific process + * + * @details Given a pointer to an ::rsmi_process_info_t @p proc and a process id + * @p pid, this function will write the process information for @p pid, if + * available, to the memory pointed to by @p proc. + * + * @param[in] pid The process ID for which process information is being requested + * + * @param[inout] proc a pointer to a ::rsmi_process_info_t to which + * process information for @p pid will be written if it is found. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call + * + * ::RSMI_STATUS_NOT_FOUND is returned if there was no process information + * found for the provided @p pid + * + */ +rsmi_status_t +rsmi_compute_process_info_by_pid_get(uint32_t pid, rsmi_process_info_t *proc); + +/** @} */ // end of SysInfo + #ifdef __cplusplus } #endif // __cplusplus diff --git a/include/rocm_smi/rocm_smi_kfd.h b/include/rocm_smi/rocm_smi_kfd.h new file mode 100755 index 0000000000..f9b6440c86 --- /dev/null +++ b/include/rocm_smi/rocm_smi_kfd.h @@ -0,0 +1,60 @@ +/* + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2019, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_KFD_H_ +#define INCLUDE_ROCM_SMI_ROCM_SMI_KFD_H_ + +#include "rocm_smi/rocm_smi.h" + +namespace amd { +namespace smi { + +int +GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated, + uint32_t *num_procs_found); +int +GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc); + +} // namespace smi +} // namespace amd + +#endif // INCLUDE_ROCM_SMI_ROCM_SMI_KFD_H_ diff --git a/include/rocm_smi/rocm_smi_utils.h b/include/rocm_smi/rocm_smi_utils.h index 4612fadce4..7b2ccbf113 100755 --- a/include/rocm_smi/rocm_smi_utils.h +++ b/include/rocm_smi/rocm_smi_utils.h @@ -62,6 +62,10 @@ namespace amd { namespace smi { +int SameFile(const std::string fileA, const std::string fileB); +bool FileExists(char const *filename); +int isRegularFile(std::string fname, bool *is_reg); + int ReadSysfsStr(std::string path, std::string *retStr); int WriteSysfsStr(std::string path, std::string val); diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 34d6063c1a..777c298c94 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -62,6 +62,7 @@ #include "rocm_smi/rocm_smi_utils.h" #include "rocm_smi/rocm_smi_exception.h" #include "rocm_smi/rocm_smi_counters.h" +#include "rocm_smi/rocm_smi_kfd.h" #include "rocm_smi/rocm_smi64Config.h" @@ -130,6 +131,7 @@ static pthread_mutex_t *get_mutex(uint32_t dv_ind) { static rsmi_status_t errno_to_rsmi_status(uint32_t err) { switch (err) { case 0: return RSMI_STATUS_SUCCESS; + case ESRCH: return RSMI_STATUS_NOT_FOUND; case EACCES: return RSMI_STATUS_PERMISSION; case EPERM: case ENOENT: return RSMI_STATUS_NOT_SUPPORTED; @@ -2299,3 +2301,51 @@ rsmi_dev_counter_group_supported(uint32_t dv_ind, rsmi_event_group_t group) { CATCH } +rsmi_status_t +rsmi_compute_process_info_get(rsmi_process_info_t *procs, + uint32_t *num_items) { + TRY + + if (num_items == nullptr) { + return RSMI_STATUS_INVALID_ARGS; + } + + uint32_t procs_found = 0; + + int err = amd::smi:: GetProcessInfo(procs, *num_items, &procs_found); + + if (err) { + return errno_to_rsmi_status(err); + } + + if (procs && *num_items < procs_found) { + return RSMI_STATUS_INSUFFICIENT_SIZE; + } + if (procs == nullptr || *num_items > procs_found) { + *num_items = procs_found; + } + + return RSMI_STATUS_SUCCESS; + + CATCH +} + +rsmi_status_t +rsmi_compute_process_info_by_pid_get(uint32_t pid, + rsmi_process_info_t *proc) { + TRY + + if (proc == nullptr) { + return RSMI_STATUS_INVALID_ARGS; + } + + int err = amd::smi::GetProcessInfoForPID(pid, proc); + + if (err) { + return errno_to_rsmi_status(err); + } + + return RSMI_STATUS_SUCCESS; + + CATCH +} diff --git a/src/rocm_smi_device.cc b/src/rocm_smi_device.cc index f1896ea8d7..7065d9486f 100755 --- a/src/rocm_smi_device.cc +++ b/src/rocm_smi_device.cc @@ -60,6 +60,7 @@ #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi.h" #include "rocm_smi/rocm_smi_exception.h" +#include "rocm_smi/rocm_smi_utils.h" extern "C" { #include "shared_mutex.h" // NOLINT @@ -159,20 +160,6 @@ static const std::map kDevPerfLvlMap = { {RSMI_DEV_PERF_LEVEL_UNKNOWN, kDevPerfLevelUnknownStr}, }; -static int isRegularFile(std::string fname, bool *is_reg) { - struct stat file_stat; - int ret; - - assert(is_reg != nullptr); - - ret = stat(fname.c_str(), &file_stat); - if (ret) { - return errno; - } - *is_reg = S_ISREG(file_stat.st_mode); - return 0; -} - #define RET_IF_NONZERO(X) { \ if (X) return X; \ } diff --git a/src/rocm_smi_kfd.cc b/src/rocm_smi_kfd.cc new file mode 100755 index 0000000000..a118b86f73 --- /dev/null +++ b/src/rocm_smi_kfd.cc @@ -0,0 +1,161 @@ +/* + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2019, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "rocm_smi/rocm_smi_kfd.h" +#include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_exception.h" +#include "rocm_smi/rocm_smi_utils.h" + +namespace amd { +namespace smi { + +static const char *kKFDProcPathRoot = "/sys/class/kfd/kfd/proc"; + +// Sysfs file names +static const char *kKFDPasidFName = "pasid"; + +static bool is_number(const std::string &s) { + return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); +} + +int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated, + uint32_t *num_procs_found) { + assert(num_procs_found != nullptr); + + *num_procs_found = 0; + errno = 0; + auto proc_dir = opendir(kKFDProcPathRoot); + + if (proc_dir == nullptr) { + perror("Unable to open process directory"); + return errno; + } + auto dentry = readdir(proc_dir); + + std::string prod_id_str; + std::string tmp; + + while (dentry != nullptr) { + if (dentry->d_name[0] == '.') { + dentry = readdir(proc_dir); + continue; + } + + prod_id_str = dentry->d_name; + assert(is_number(prod_id_str) && "Unexpected file name in kfd/proc dir"); + if (!is_number(prod_id_str)) { + continue; + } + if (procs && *num_procs_found < num_allocated) { + int err; + std::string tmp; + + procs[*num_procs_found].process_id = std::stoi(prod_id_str); + + std::string pasid_str_path = kKFDProcPathRoot; + pasid_str_path += "/"; + pasid_str_path += prod_id_str; + pasid_str_path += "/"; + pasid_str_path += kKFDPasidFName; + + err = ReadSysfsStr(pasid_str_path, &tmp); + if (err) { + return err; + } + assert(is_number(tmp) && "Unexpected value in pasid file"); + procs[*num_procs_found].pasid = std::stoi(tmp); + } + ++(*num_procs_found); + + dentry = readdir(proc_dir); + } + + errno = 0; + if (closedir(proc_dir)) { + return errno; + } + return 0; +} + +int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc) { + assert(proc != nullptr); + int err; + std::string tmp; + + std::string proc_str_path = kKFDProcPathRoot; + proc_str_path += "/"; + proc_str_path += std::to_string(pid); + + if (!FileExists(proc_str_path.c_str())) { + return ESRCH; + } + proc->process_id = pid; + + std::string pasid_str_path = proc_str_path; + pasid_str_path += "/"; + pasid_str_path += kKFDPasidFName; + + err = ReadSysfsStr(pasid_str_path, &tmp); + if (err) { + return err; + } + assert(is_number(tmp) && "Unexpected value in pasid file"); + proc->pasid = std::stoi(tmp); + + return 0; +} + +} // namespace smi +} // namespace amd diff --git a/src/rocm_smi_main.cc b/src/rocm_smi_main.cc index f07871c1ae..39ef5c1dce 100755 --- a/src/rocm_smi_main.cc +++ b/src/rocm_smi_main.cc @@ -39,7 +39,6 @@ * DEALINGS WITH THE SOFTWARE. * */ -#include #include #include #include @@ -61,6 +60,7 @@ #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi_main.h" #include "rocm_smi/rocm_smi_exception.h" +#include "rocm_smi/rocm_smi_utils.h" static const char *kPathDRMRoot = "/sys/class/drm"; static const char *kPathHWMonRoot = "/sys/class/hwmon"; @@ -73,10 +73,6 @@ static const char *kAMDMonitorTypes[] = {"radeon", "amdgpu", ""}; namespace amd { namespace smi { -static bool FileExists(char const *filename) { - struct stat buf; - return (stat(filename, &buf) == 0); -} static uint32_t GetDeviceIndex(const std::string s) { std::string t = s; @@ -86,33 +82,6 @@ static uint32_t GetDeviceIndex(const std::string s) { return stoi(t); } -// Return 0 if same file, 1 if not, and -1 for error -static int SameFile(const std::string fileA, const std::string fileB) { - struct stat aStat; - struct stat bStat; - int ret; - - ret = stat(fileA.c_str(), &aStat); - if (ret) { - return -1; - } - - ret = stat(fileB.c_str(), &bStat); - if (ret) { - return -1; - } - - if (aStat.st_dev != bStat.st_dev) { - return 1; - } - - if (aStat.st_ino != bStat.st_ino) { - return 1; - } - - return 0; -} - static int SameDevice(const std::string fileA, const std::string fileB) { return SameFile(fileA + "/device", fileB + "/device"); } diff --git a/src/rocm_smi_utils.cc b/src/rocm_smi_utils.cc index a069dfd822..6a150faf33 100755 --- a/src/rocm_smi_utils.cc +++ b/src/rocm_smi_utils.cc @@ -42,6 +42,7 @@ */ #include #include +#include #include #include @@ -53,6 +54,52 @@ namespace amd { namespace smi { +// Return 0 if same file, 1 if not, and -1 for error +int SameFile(const std::string fileA, const std::string fileB) { + struct stat aStat; + struct stat bStat; + int ret; + + ret = stat(fileA.c_str(), &aStat); + if (ret) { + return -1; + } + + ret = stat(fileB.c_str(), &bStat); + if (ret) { + return -1; + } + + if (aStat.st_dev != bStat.st_dev) { + return 1; + } + + if (aStat.st_ino != bStat.st_ino) { + return 1; + } + + return 0; +} + +bool FileExists(char const *filename) { + struct stat buf; + return (stat(filename, &buf) == 0); +} + +int isRegularFile(std::string fname, bool *is_reg) { + struct stat file_stat; + int ret; + + assert(is_reg != nullptr); + + ret = stat(fname.c_str(), &file_stat); + if (ret) { + return errno; + } + *is_reg = S_ISREG(file_stat.st_mode); + return 0; +} + int WriteSysfsStr(std::string path, std::string val) { std::ofstream fs; int ret = 0; diff --git a/tests/rocm_smi_test/functional/mem_util_read.cc b/tests/rocm_smi_test/functional/mem_util_read.cc index 1c2966fdd1..6c076c52c5 100755 --- a/tests/rocm_smi_test/functional/mem_util_read.cc +++ b/tests/rocm_smi_test/functional/mem_util_read.cc @@ -94,7 +94,6 @@ void TestMemUtilRead::Run(void) { rsmi_status_t err; uint64_t total; uint64_t usage; - uint32_t mem_busy_percent; TestBase::Run(); diff --git a/tests/rocm_smi_test/functional/process_info_read.cc b/tests/rocm_smi_test/functional/process_info_read.cc new file mode 100755 index 0000000000..382e1bac8b --- /dev/null +++ b/tests/rocm_smi_test/functional/process_info_read.cc @@ -0,0 +1,181 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2019, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include +#include + +#include +#include + +#include "gtest/gtest.h" +#include "rocm_smi/rocm_smi.h" +#include "rocm_smi_test/functional/process_info_read.h" +#include "rocm_smi_test/test_common.h" + +TestProcInfoRead::TestProcInfoRead() : TestBase() { + set_title("RSMI Process Info Read Test"); + set_description("This test verifies that process information such as the " + "process ID, PASID, etc. can be read properly."); +} + +TestProcInfoRead::~TestProcInfoRead(void) { +} + +void TestProcInfoRead::SetUp(void) { + TestBase::SetUp(); + + return; +} + +void TestProcInfoRead::DisplayTestInfo(void) { + TestBase::DisplayTestInfo(); +} + +void TestProcInfoRead::DisplayResults(void) const { + TestBase::DisplayResults(); + return; +} + +void TestProcInfoRead::Close() { + // This will close handles opened within rsmitst utility calls and call + // rsmi_shut_down(), so it should be done after other hsa cleanup + TestBase::Close(); +} + +static void dumpProcess(rsmi_process_info_t *p) { + assert(p != nullptr); + std::cout << "ProcessID: " << p->process_id << " "; + std::cout << "PASID: " << p->pasid << " "; + std::cout << std::endl; +} +void TestProcInfoRead::Run(void) { + rsmi_status_t err; + uint32_t num_proc_found; + uint32_t val_ui32; + rsmi_process_info_t *procs = nullptr; + + TestBase::Run(); + + err = rsmi_compute_process_info_get(nullptr, &num_proc_found); + if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_FILE_ERROR) { + IF_VERB(STANDARD) { + std::cout << "\t**Process info. read: Not supported on this machine" + << std::endl; + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + IF_VERB(STANDARD) { + std::cout << "\t** " << std::dec << num_proc_found << + " GPU processes found" << std::endl; + } + } + + if (num_proc_found == 0) { + return; + } + procs = new rsmi_process_info_t[num_proc_found]; + + val_ui32 = num_proc_found; + err = rsmi_compute_process_info_get(procs, &val_ui32); + if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_INSUFFICIENT_SIZE) { + IF_VERB(STANDARD) { + std::cout << "\t** " << val_ui32 << + " processes were read, but more became available that were unread." + << std::endl; + for (uint32_t i = 0; i < val_ui32; ++i) { + dumpProcess(&procs[i]); + } + + return; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + IF_VERB(STANDARD) { + std::cout << "\t** Processes currently using GPU: " << std::endl; + for (uint32_t i = 0; i < val_ui32; ++i) { + dumpProcess(&procs[i]); + } + } + } + + // Reset to the number we actually read + num_proc_found = val_ui32; + if (num_proc_found) { + rsmi_process_info_t proc_info; + + err = rsmi_compute_process_info_by_pid_get(procs[0].process_id, + &proc_info); + if (err == RSMI_STATUS_NOT_FOUND) { + std::cout << + "\t** WARNING: rsmi_compute_process_info_get() found process " << + procs[0].process_id << ", but subsequently, " + "rsmi_compute_process_info_by_pid_get() did not" + " find this same process." << std::endl; + } else { + CHK_ERR_ASRT(err) + ASSERT_EQ(proc_info.process_id, procs[0].process_id); + ASSERT_EQ(proc_info.pasid, procs[0].pasid); + } + } + if (num_proc_found > 1) { + rsmi_process_info_t tmp_proc; + val_ui32 = 1; + err = rsmi_compute_process_info_get(&tmp_proc, &val_ui32); + + if (err != RSMI_STATUS_INSUFFICIENT_SIZE) { + std::cout << "Expected rsmi_compute_process_info_get() to tell us" + " there are more processes available, but instead go return code " << + err << std::endl; + } + } + delete []procs; +} diff --git a/tests/rocm_smi_test/functional/process_info_read.h b/tests/rocm_smi_test/functional/process_info_read.h new file mode 100755 index 0000000000..9640fa5cd0 --- /dev/null +++ b/tests/rocm_smi_test/functional/process_info_read.h @@ -0,0 +1,73 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2019, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ +#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ + +#include "rocm_smi_test/test_base.h" + +class TestProcInfoRead : public TestBase { + public: + TestProcInfoRead(); + + // @Brief: Destructor for test case of TestProcInfoRead + virtual ~TestProcInfoRead(); + + // @Brief: Setup the environment for measurement + virtual void SetUp(); + + // @Brief: Core measurement execution + virtual void Run(); + + // @Brief: Clean up and retrive the resource + virtual void Close(); + + // @Brief: Display results + virtual void DisplayResults() const; + + // @Brief: Display information about what this test does + virtual void DisplayTestInfo(void); +}; + +#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_PROCESS_INFO_READ_H_ diff --git a/tests/rocm_smi_test/main.cc b/tests/rocm_smi_test/main.cc index ca7eacdecd..afce6643e7 100755 --- a/tests/rocm_smi_test/main.cc +++ b/tests/rocm_smi_test/main.cc @@ -72,7 +72,8 @@ #include "functional/err_cnt_read.h" #include "functional/mem_util_read.h" #include "functional/id_info_read.h" -#include "rocm_smi_test/functional/perf_cntr_read_write.h" +#include "functional/perf_cntr_read_write.h" +#include "functional/process_info_read.h" static RSMITstGlobals *sRSMIGlvalues = nullptr; @@ -199,10 +200,15 @@ TEST(rsmitstReadOnly, TestIdInfoRead) { TestIdInfoRead tst; RunGenericTest(&tst); } -TEST(rsmitstreadWrite, TestPerfCntrReadWrite) { +TEST(rsmitstReadWrite, TestPerfCntrReadWrite) { TestPerfCntrReadWrite tst; RunGenericTest(&tst); } +TEST(rsmitstReadOnly, TestProcInfoRead) { + TestProcInfoRead tst; + RunGenericTest(&tst); +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv);