Merge pull request #34 from cfreehill/master

Added rsmi_dev_pci_replay_counter_get()

[ROCm/rocm_smi_lib commit: caf2748398]
Этот коммит содержится в:
Chris Freehill
2019-05-06 13:28:53 -05:00
коммит произвёл GitHub
родитель 8bc9fb16e7 2a87e5bc44
Коммит 3901643427
11 изменённых файлов: 88 добавлений и 21 удалений
+18
Просмотреть файл
@@ -744,6 +744,24 @@ rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid);
rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
uint64_t *received, uint64_t *max_pkt_sz);
/**
* @brief Get PCIe replay counter
*
* @details Given a device index @p dv_ind and a pointer to a uint64_t @p
* counter, this function will write the sum of the number of NAK's received
* by the GPU and the NAK's generated by the GPU to memory pointed to by @p
* counter.
*
* @param[in] dv_ind a device index
*
* @param[inout] counter a pointer to uint64_t to which the sum of the NAK's
* received and generated by the GPU is written
*
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_dev_pci_replay_counter_get(uint32_t dv_ind,
uint64_t *counter);
/** @} */ // end of PCIeQuer
/*****************************************************************************/
/** @defgroup PCIeCont PCIe Control
+3 -1
Просмотреть файл
@@ -56,7 +56,7 @@
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi.h"
extern "C" {
#include "shared_mutex.h"
#include "shared_mutex.h" // NOLINT
};
namespace amd {
@@ -90,6 +90,7 @@ enum DevInfoTypes {
kDevMemUsedGTT,
kDevMemUsedVisVRAM,
kDevMemUsedVRAM,
kDevPCIEReplayCount,
};
class Device {
@@ -116,6 +117,7 @@ class Device {
void set_bdfid(uint64_t val) {bdfid_ = val;}
uint64_t get_bdfid(void) const {return bdfid_;}
pthread_mutex_t *mutex(void) {return mutex_.ptr;}
private:
std::shared_ptr<Monitor> monitor_;
std::shared_ptr<PowerMon> power_monitor_;
+6 -6
Просмотреть файл
@@ -66,23 +66,23 @@ int ReadSysfsStr(std::string path, std::string *retStr);
int WriteSysfsStr(std::string path, std::string val);
struct pthread_wrap {
public:
pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
public:
explicit pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
void Acquire() { pthread_mutex_lock(&mutex_); }
void Release() { pthread_mutex_unlock(&mutex_); }
private:
private:
pthread_mutex_t& mutex_;
};
struct ScopedPthread {
ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) {
explicit ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) {
pthrd_ref_.Acquire();
};
}
~ScopedPthread() {
pthrd_ref_.Release();
}
private:
private:
ScopedPthread(const ScopedPthread&);
pthread_wrap& pthrd_ref_;
+15 -2
Просмотреть файл
@@ -120,8 +120,8 @@ static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
switch (err) {
case 0: return RSMI_STATUS_SUCCESS;
case EACCES: return RSMI_STATUS_PERMISSION;
case EPERM: return RSMI_STATUS_NOT_SUPPORTED;
case ENOENT:
case EPERM:
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
case EISDIR: return RSMI_STATUS_FILE_ERROR;
default: return RSMI_STATUS_UNKNOWN_ERROR;
}
@@ -2044,3 +2044,16 @@ rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str,
CATCH
}
rsmi_status_t
rsmi_dev_pci_replay_counter_get(uint32_t dv_ind, uint64_t *counter) {
TRY
DEVICE_MUTEX
rsmi_status_t ret;
ret = get_dev_value_int(amd::smi::kDevPCIEReplayCount, dv_ind, counter);
return ret;
CATCH
}
+4 -1
Просмотреть файл
@@ -96,6 +96,7 @@ static const char *kDevMemTotVRAMFName = "mem_info_vram_total";
static const char *kDevMemUsedGTTFName = "mem_info_gtt_used";
static const char *kDevMemUsedVisVRAMFName = "mem_info_vis_vram_used";
static const char *kDevMemUsedVRAMFName = "mem_info_vram_used";
static const char *kDevPCIEReplayCountFName = "pcie_replay_count";
// Strings that are found within sysfs files
static const char *kDevPerfLevelAutoStr = "auto";
@@ -136,6 +137,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevMemUsedGTT, kDevMemUsedGTTFName},
{kDevMemUsedVisVRAM, kDevMemUsedVisVRAMFName},
{kDevMemUsedVRAM, kDevMemUsedVRAMFName},
{kDevPCIEReplayCount, kDevPCIEReplayCountFName},
};
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
@@ -202,7 +204,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
DBG_FILE_ERROR(sysfs_path, str);
if (!isRegularFile(sysfs_path)) {
return EISDIR;
return ENOENT;
}
fs->open(sysfs_path);
@@ -367,6 +369,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
case kDevMemUsedGTT:
case kDevMemUsedVisVRAM:
case kDevMemUsedVRAM:
case kDevPCIEReplayCount:
ret = readDevInfoStr(type, &tempStr);
RET_IF_NONZERO(ret);
*val = std::stoul(tempStr, 0);
+16 -2
Просмотреть файл
@@ -8,6 +8,7 @@
#include <stdio.h> // perror
#include <stdlib.h> // malloc, free
#include <string.h> // strcpy
#include <time.h> // clock_gettime
shared_mutex_t shared_mutex_init(const char *name, mode_t mode) {
shared_mutex_t mutex = {NULL, 0, NULL, 0};
@@ -51,7 +52,17 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode) {
return mutex;
}
if (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL) {
pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr;
// Make sure the mutex wasn't left in a locked state. If we can't
// acquire it in 3 sec., re-do everything.
struct timespec expireTime;
clock_gettime(CLOCK_REALTIME, &expireTime);
expireTime.tv_sec += 3;
int ret = pthread_mutex_timedlock(mutex_ptr, &expireTime);
if (ret || (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL)) {
// Something is out of sync. Unlink shm and start over.
if (shm_unlink(name)) {
mutex.shm_fd = 0;
@@ -60,9 +71,12 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode) {
free(mutex.name);
return shared_mutex_init(name, mode);
} else {
if (pthread_mutex_unlock(mutex_ptr)) {
perror("pthread_mutex_unlock");
}
}
pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr;
if (mutex.created) {
pthread_mutexattr_t attr;
+3
Просмотреть файл
@@ -1,3 +1,6 @@
// NOLINT(legal/copyright)
// See LICENSE file
#ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_
#define SRC_SHARED_MUTEX_SHARED_MUTEX_H_
+1 -1
Просмотреть файл
@@ -96,7 +96,7 @@ void TestErrCntRead::Run(void) {
err = rsmi_dev_ecc_enabled_get(i, &enabled_mask);
if (err == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**Error Count Enabled Mask for is not supported on this machine"
"\t**Error Count Enabled Mask get is not supported on this machine"
<< std::endl;
} else {
CHK_ERR_ASRT(err)
+20 -6
Просмотреть файл
@@ -89,13 +89,26 @@ void TestPciReadWrite::Run(void) {
rsmi_status_t ret;
rsmi_pcie_bandwidth_t bw;
uint32_t freq_bitmask;
uint64_t sent, received, max_pkt_sz;
uint64_t sent, received, max_pkt_sz, u64int;
TestBase::Run();
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
PrintDeviceHeader(dv_ind);
ret = rsmi_dev_pci_replay_counter_get(dv_ind, &u64int);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**rsmi_dev_pci_replay_counter_get() is not supported"
" on this machine" << std::endl;
} else {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\tPCIe Replay Counter: " << u64int << std::endl;
}
}
ret = rsmi_dev_pci_throughput_get(dv_ind, &sent, &received, &max_pkt_sz);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout << "TEST FAILURE: Current PCIe throughput is not detected. "
@@ -106,7 +119,7 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "PCIe Throughput (1 sec.): " << std::endl;
std::cout << "\tPCIe Throughput (1 sec.): " << std::endl;
std::cout << "\t\tSent: " << sent << " bytes" << std::endl;
std::cout << "\t\tReceived: " << received << " bytes" << std::endl;
std::cout << "\t\tMax Packet Size: " << max_pkt_sz << " bytes" <<
@@ -125,7 +138,8 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "Initial PCIe is " << bw.transfer_rate.current << std::endl;
std::cout << "\tInitial PCIe is " << bw.transfer_rate.current <<
std::endl;
}
// First set the bitmask to all supported bandwidths
@@ -141,7 +155,7 @@ void TestPciReadWrite::Run(void) {
freq_bm_str.size()-1));
IF_VERB(STANDARD) {
std::cout << "Setting bandwidth mask to " << "0b" << freq_bm_str <<
std::cout << "\tSetting bandwidth mask to " << "0b" << freq_bm_str <<
" ..." << std::endl;
}
ret = rsmi_dev_pci_bandwidth_set(dv_ind, freq_bitmask);
@@ -151,9 +165,9 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "Bandwidth is now index " << bw.transfer_rate.current <<
std::cout << "\tBandwidth is now index " << bw.transfer_rate.current <<
std::endl;
std::cout << "Resetting mask to all bandwidths." << std::endl;
std::cout << "\tResetting mask to all bandwidths." << std::endl;
}
ret = rsmi_dev_pci_bandwidth_set(dv_ind, 0xFFFFFFFF);
CHK_ERR_ASRT(ret)
+1 -1
Просмотреть файл
@@ -99,7 +99,7 @@ void TestTempRead::Run(void) {
err = rsmi_dev_temp_metric_get(i, 0, met, &val_i64);
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_FILE_ERROR) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << label << ": " <<
"Not supported on this machine" << std::endl;
+1 -1
Просмотреть файл
@@ -155,7 +155,7 @@ void TestVoltCurvRead::Run(void) {
err = rsmi_dev_od_volt_info_get(i, &odv);
if (err == RSMI_STATUS_FILE_ERROR ||
err == RSMI_STATUS_NOT_YET_IMPLEMENTED) {
err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**rsmi_dev_od_volt_info_get: Not supported on this machine"