Update XGMI perf counter test to show utilization

Also:
* When destroying a counter, make sure to stop the counter first
* In the test, do not stop (disable) the counter before
  reading it.
* Clean up some whitespace in other tests
* Re-add manual pdf file

Change-Id: I0786ef3a994ca568299c77e44f092af8943ac33d


[ROCm/rocm_smi_lib commit: f946ea37ef]
Этот коммит содержится в:
Chris Freehill
2020-06-02 11:54:36 -05:00
родитель ef34c94574
Коммит b07fd8fca7
9 изменённых файлов: 227 добавлений и 103 удалений
Двоичный файл не отображается.
+100 -6
Просмотреть файл
@@ -244,7 +244,9 @@ typedef enum {
*/
typedef enum {
RSMI_CNTR_CMD_START = 0, //!< Start the counter
RSMI_CNTR_CMD_STOP, //!< Stop the counter
RSMI_CNTR_CMD_STOP, //!< Stop the counter; note that this should not
//!< be used before reading. It is for temporarily
//!< disabling the counter.
} rsmi_counter_command_t;
/**
@@ -525,11 +527,11 @@ typedef enum {
* @brief Types for IO Link
*/
typedef enum _RSMI_IO_LINK_TYPE {
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
} RSMI_IO_LINK_TYPE;
/**
@@ -2333,6 +2335,98 @@ rsmi_status_string(rsmi_status_t status, const char **status_string);
/** @defgroup PerfCntr Performance Counter Functions
* These functions are used to configure, query and control performance
* counting.
*
* These functions use the same mechanisms as the "perf" command line
* utility. They share the same underlying resources and have some similarities
* in how they are used. The events supported by this API should have
* corresponding perf events that can be seen with "perf stat ...". The events
* supported by perf can be seen with "perf list"
*
* The types of events available and the ability to count those
* events are dependent on which device is being targeted and if counters are
* still available for that device, respectively.
* ::rsmi_dev_counter_group_supported() can be used to see which event types
* (::rsmi_event_group_t) are supported for a given device. Assuming a device
* supports a given event type, we can then check to see if there are counters
* available to count a specific event with
* ::rsmi_counter_available_counters_get(). Counters may be occupied by other
* perf based programs.
*
* Once it is determined that events are supported and counters are available,
* an event counter can be created/destroyed and controlled.
*
* ::rsmi_dev_counter_create() allocates internal data structures that will be
* used to used to control the event counter, and return a handle to this data
* structure.
*
* Once an event counter handle is obtained, the event counter can be
* controlled (i.e., started, stopped,...) with ::rsmi_counter_control() by
* passing ::rsmi_counter_command_t commands. ::RSMI_CNTR_CMD_START starts an
* event counter and ::RSMI_CNTR_CMD_STOP stops a counter.
* ::rsmi_counter_read() reads an event counter.
*
* Once the counter is no longer needed, the resources it uses should be freed
* by calling ::rsmi_dev_counter_destroy().
*
*
* Important Notes about Counter Values
* ====================================
* - A running "absolute" counter is kept internally. For the discussion that
* follows, we will call the internal counter value at time \a t \a
* val<sub>t</sub>
* - Issuing ::RSMI_CNTR_CMD_START or calling ::rsmi_counter_read(), causes
* RSMI (in kernel) to internally record the current absolute counter value
* - ::rsmi_counter_read() returns the number of events that have occurred
* since the previously recorded value (ie, a relative value,
* \a val<sub>t</sub> - val<sub>t-1</sub>) from the issuing of
* ::RSMI_CNTR_CMD_START or calling ::rsmi_counter_read()
*
* Example of event counting sequence:
*
* \latexonly
* \pagebreak
* \endlatexonly
* \code{.cpp}
*
* rsmi_counter_value_t value;
*
* // Determine if RSMI_EVNT_GRP_XGMI is supported for device dv_ind
* ret = rsmi_dev_counter_group_supported(dv_ind, RSMI_EVNT_GRP_XGMI);
*
* // See if there are counters available for device dv_ind for event
* // RSMI_EVNT_GRP_XGMI
*
* ret = rsmi_counter_available_counters_get(dv_ind,
* RSMI_EVNT_GRP_XGMI, &counters_available);
*
* // Assuming RSMI_EVNT_GRP_XGMI is supported and there is at least 1
* // counter available for RSMI_EVNT_GRP_XGMI on device dv_ind, create
* // an event object and get the handle (rsmi_event_handle_t).
*
* ret = rsmi_dev_counter_create(dv_ind, RSMI_EVNT_GRP_XGMI, &evnt_handle);
*
* // A program that generates the events of interest can be started
* // immediately before or after starting the counters.
* // Start counting:
* ret = rsmi_counter_control(evnt_handle, RSMI_CNTR_CMD_START, NULL);
*
* // Wait...
*
* // Get the number of events since RSMI_CNTR_CMD_START was issued:
* ret = rsmi_counter_read(rsmi_event_handle_t evt_handle, &value)
*
* // Wait...
*
* // Get the number of events since rsmi_counter_read() was last called:
* ret = rsmi_counter_read(rsmi_event_handle_t evt_handle, &value)
*
* // Stop counting.
* ret = rsmi_counter_control(evnt_handle, RSMI_CNTR_CMD_STOP, NULL);
*
* // Release all resources (e.g., counter and memory resources) associated
* with evnt_handle.
* ret = rsmi_dev_counter_destroy(evnt_handle);
* \endcode
* @{
*/
+1
Просмотреть файл
@@ -48,6 +48,7 @@
#include <unordered_set>
#include <memory>
#include <map>
#include <utility>
#include "rocm_smi/rocm_smi.h"
+1
Просмотреть файл
@@ -54,6 +54,7 @@
#include <unordered_map>
#include <map>
#include <mutex> // NOLINT
#include <utility>
#include "rocm_smi/rocm_smi_io_link.h"
#include "rocm_smi/rocm_smi_kfd.h"
+11 -2
Просмотреть файл
@@ -566,7 +566,7 @@ rsmi_init(uint64_t flags) {
smi.Initialize(flags);
} catch(...) {
smi.Cleanup();
throw;
throw amd::smi::rsmi_exception(RSMI_STATUS_INIT_ERROR, __FUNCTION__);
}
}
refGuard.Dismiss();
@@ -2663,14 +2663,17 @@ rsmi_dev_counter_destroy(rsmi_event_handle_t evnt_handle) {
return RSMI_STATUS_INVALID_ARGS;
}
uint32_t ret = 0;
amd::smi::evt::Event *evt =
reinterpret_cast<amd::smi::evt::Event *>(evnt_handle);
uint32_t dv_ind = evt->dev_ind();
DEVICE_MUTEX
REQUIRE_ROOT_ACCESS
ret = evt->stopCounter();
delete evt;
return RSMI_STATUS_SUCCESS;
return errno_to_rsmi_status(ret);;
CATCH
}
@@ -2730,6 +2733,12 @@ rsmi_counter_read(rsmi_event_handle_t evt_handle,
ret = evt->getValue(value);
// If value > 2^48, then an overflow has occurred. We need to discard this
// value and re-read:
if (ret == 0 && value->value > 0xFFFFFFFFFFFF) {
ret = evt->getValue(value);
}
return errno_to_rsmi_status(ret);
CATCH
}
+1
Просмотреть файл
@@ -315,6 +315,7 @@ amd::smi::evt::Event::openPerfHandle(void) {
attr_.size = sizeof(struct perf_event_attr);
attr_.config = get_perf_attr_config(&event_info_);
attr_.sample_type = PERF_SAMPLE_IDENTIFIER;
attr_.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
attr_.disabled = 1;
+32 -21
Просмотреть файл
@@ -48,6 +48,7 @@
#include <iostream>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
@@ -62,7 +63,8 @@ typedef struct {
TestHWTopologyRead::TestHWTopologyRead() : TestBase() {
set_title("RSMI Hardware Topology Read Test");
set_description("This test verifies that Hardware Topology can be read properly.");
set_description(
"This test verifies that Hardware Topology can be read properly.");
}
TestHWTopologyRead::~TestHWTopologyRead(void) {
@@ -103,16 +105,20 @@ void TestHWTopologyRead::Run(void) {
err = rsmi_num_monitor_devices(&num_devices);
CHK_ERR_ASRT(err)
gpu_link_t gpu_links[num_devices][num_devices];
uint32_t numa_numbers[num_devices];
// gpu_link_t gpu_links[num_devices][num_devices];
std::vector<std::vector<gpu_link_t>> gpu_links(num_devices,
std::vector<gpu_link_t>(num_devices));
// uint32_t numa_numbers[num_devices];
std::vector<uint32_t> numa_numbers(num_devices);
for (uint32_t dv_ind=0; dv_ind<num_devices; dv_ind++) {
for (uint32_t dv_ind = 0; dv_ind < num_devices; ++dv_ind) {
err = rsmi_topo_get_numa_node_number(dv_ind, &numa_numbers[dv_ind]);
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**Numa Node Number. read: Not supported on this machine"
<< std::endl;
std::cout <<
"\t**Numa Node Number. read: Not supported on this machine" <<
std::endl;
return;
}
} else {
@@ -121,9 +127,9 @@ void TestHWTopologyRead::Run(void) {
}
}
for (uint32_t dv_ind_src=0; dv_ind_src<num_devices; dv_ind_src++) {
for (uint32_t dv_ind_dst=0; dv_ind_dst<num_devices; dv_ind_dst++) {
if(dv_ind_src == dv_ind_dst) {
for (uint32_t dv_ind_src = 0; dv_ind_src < num_devices; dv_ind_src++) {
for (uint32_t dv_ind_dst = 0; dv_ind_dst < num_devices; dv_ind_dst++) {
if (dv_ind_src == dv_ind_dst) {
gpu_links[dv_ind_src][dv_ind_dst].type = "X";
gpu_links[dv_ind_src][dv_ind_dst].hops = 0;
gpu_links[dv_ind_src][dv_ind_dst].weight = 0;
@@ -134,8 +140,9 @@ void TestHWTopologyRead::Run(void) {
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**Link Type. read: Not supported on this machine"
<< std::endl;
std::cout <<
"\t**Link Type. read: Not supported on this machine"
<< std::endl;
return;
}
} else {
@@ -153,15 +160,18 @@ void TestHWTopologyRead::Run(void) {
default:
gpu_links[dv_ind_src][dv_ind_dst].type = "XXXX";
std::cout << "\t**Invalid IO LINK type. type=" << type << std::endl;
std::cout << "\t**Invalid IO LINK type. type=" << type <<
std::endl;
}
}
err = rsmi_topo_get_link_weight(dv_ind_src, dv_ind_dst, &gpu_links[dv_ind_src][dv_ind_dst].weight);
err = rsmi_topo_get_link_weight(dv_ind_src, dv_ind_dst,
&gpu_links[dv_ind_src][dv_ind_dst].weight);
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**Link Weight. read: Not supported on this machine"
<< std::endl;
std::cout <<
"\t**Link Weight. read: Not supported on this machine"
<< std::endl;
return;
}
} else {
@@ -197,10 +207,11 @@ void TestHWTopologyRead::Run(void) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if(i == j)
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
else
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].type;
}
}
std::cout << std::endl;
}
@@ -218,9 +229,9 @@ void TestHWTopologyRead::Run(void) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if(i == j)
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
else {
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].hops;
}
}
@@ -240,9 +251,9 @@ void TestHWTopologyRead::Run(void) {
tmp = "GPU" + std::to_string(i);
std::cout << std::setw(6) << std::left << tmp;
for (j = 0; j < num_devices; j++) {
if(i == j)
if (i == j) {
std::cout << std::setw(12) << std::left << "X";
else {
} else {
std::cout << std::setw(12) << std::left << gpu_links[i][j].weight;
}
}
+78 -74
Просмотреть файл
@@ -107,11 +107,84 @@ void TestPerfCntrReadWrite::Close() {
#define RSMI_EVNT_ENUM_LAST(GRP_NAME) RSMI_EVNT_##GRP_NAME##_LAST
// Refactor this to handle different event groups once we have > 1 event group
void
TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) {
void TestPerfCntrReadWrite::CountEvents(uint32_t dv_ind,
rsmi_event_type_t evnt, rsmi_counter_value_t *val, int32_t sleep_sec) {
rsmi_event_handle_t evt_handle;
rsmi_status_t ret;
ret = rsmi_dev_counter_create(dv_ind,
static_cast<rsmi_event_type_t>(evnt), &evt_handle);
CHK_ERR_ASRT(ret)
// Note that rsmi_dev_counter_create() should never return
// RSMI_STATUS_NOT_SUPPORTED. It will return RSMI_STATUS_OUT_OF_RESOURCES
// if it is unable to create a counter.
ret = rsmi_dev_counter_create(dv_ind,
static_cast<rsmi_event_type_t>(evnt), nullptr);
ASSERT_EQ(ret, RSMI_STATUS_INVALID_ARGS);
ret = rsmi_counter_control(evt_handle, RSMI_CNTR_CMD_START, nullptr);
CHK_ERR_ASRT(ret)
sleep(sleep_sec);
ret = rsmi_counter_read(evt_handle, val);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\t\tValue: " << val->value << std::endl;
std::cout << "\t\t\tTime Enabled: " << val->time_enabled << std::endl;
std::cout << "\t\t\tTime Running: " << val->time_running << std::endl;
std::cout << "\t\t\tEvents/Second Running: " <<
val->value/(float)val->time_running << std::endl;
}
ret = rsmi_dev_counter_destroy(evt_handle);
CHK_ERR_ASRT(ret)
}
static const uint64_t kGig = 1000000000;
static const uint64_t kVg20Level1Bandwidth = 23; // 23 GB/sec
void
TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) {
rsmi_status_t ret;
rsmi_counter_value_t val;
uint64_t throughput;
auto utiliz = [&](rsmi_event_type_t evt, uint32_t chan) {
std::cout << "****************************" << std::endl;
std::cout << "Test XGMI Link Utilization (channel " <<
chan << ")" << std::endl;
std::cout << "****************************" << std::endl;
std::cout << "Assumed Level 1 Bandwidth: " <<
kVg20Level1Bandwidth << "GB/sec" << std::endl;
uint32_t tmp_verbosity = verbosity();
set_verbosity(0);
for (int i = 0; i < 5; ++i) {
std::cout << "\t\tPass " << i << ":" << std::endl;
CountEvents(dv_ind, evt, &val, 1);
double coll_time_sec = static_cast<float>(val.time_running)/kGig;
throughput = (val.value * 32)/coll_time_sec;
std::cout << "\t\t\tCollected events for " << coll_time_sec <<
" seconds" << std::endl;
std::cout << "\t\t\tEvents collected: " << val.value << std::endl;
std::cout << "\t\t\tXGMI throughput: " << throughput <<
" bytes/second" << std::endl;
std::cout << "\t\t\tXGMI Channel Utilization: " <<
100*throughput/ (float)(kVg20Level1Bandwidth*kGig) <<
"%" << std::endl;
std::cout << "\t\t\t****" << std::endl;
}
set_verbosity(tmp_verbosity);
};
utiliz(RSMI_EVNT_XGMI_1_BEATS_TX, 1);
utiliz(RSMI_EVNT_XGMI_0_BEATS_TX, 0);
std::cout << "****************************" << std::endl;
std::cout << "Test each event individually" << std::endl;
@@ -131,71 +204,11 @@ TestPerfCntrReadWrite::testEventsIndividually(uint32_t dv_ind) {
IF_VERB(STANDARD) {
std::cout << "\tTesting Event Type " << evnt << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t\tCreating event..." << std::endl;
}
ret = rsmi_dev_counter_create(dv_ind,
static_cast<rsmi_event_type_t>(evnt), &evt_handle);
CHK_ERR_ASRT(ret)
// Note that rsmi_dev_counter_create() should never return
// RSMI_STATUS_NOT_SUPPORTED. It will return RSMI_STATUS_OUT_OF_RESOURCES
// if it is unable to create a counter.
ret = rsmi_dev_counter_create(dv_ind,
static_cast<rsmi_event_type_t>(evnt), nullptr);
ASSERT_EQ(ret, RSMI_STATUS_INVALID_ARGS);
IF_VERB(STANDARD) {
std::cout << "\t\tStart Counting..." << std::endl;
}
ret = rsmi_counter_control(evt_handle, RSMI_CNTR_CMD_START, nullptr);
CHK_ERR_ASRT(ret)
sleep(1);
IF_VERB(STANDARD) {
std::cout << "\t\tStop Counting..." << std::endl;
}
ret = rsmi_counter_control(evt_handle, RSMI_CNTR_CMD_STOP, nullptr);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\tRead Counter..." << std::endl;
}
ret = rsmi_counter_read(evt_handle, &val);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\tSuccessfully read value: " << std::endl;
std::cout << "\t\t\tValue: " << val.value << std::endl;
std::cout << "\t\t\tTime Enabled: " << val.time_enabled << std::endl;
std::cout << "\t\t\tTime Running: " << val.time_running << std::endl;
}
IF_VERB(STANDARD) {
std::cout << "\t\tRe-start Counting..." << std::endl;
}
ret = rsmi_counter_control(evt_handle, RSMI_CNTR_CMD_START, nullptr);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\tRead free-running Counter..." << std::endl;
}
ret = rsmi_counter_read(evt_handle, &val);
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t\tSuccessfully read value: " << std::endl;
std::cout << "\t\t\tValue: " << val.value << std::endl;
std::cout << "\t\t\tTime Enabled: " << val.time_enabled << std::endl;
std::cout << "\t\t\tTime Running: " << val.time_running << std::endl;
}
ret = rsmi_dev_counter_destroy(evt_handle);
CHK_ERR_ASRT(ret)
CountEvents(dv_ind, static_cast<rsmi_event_type_t>(evnt), &val);
}
}
}
// Refactor this to handle different event groups once we have > 1 event group
void
TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) {
rsmi_event_handle_t evt_handle[RSMI_EVNT_XGMI_LAST -
@@ -261,19 +274,10 @@ TestPerfCntrReadWrite::testEventsSimultaneously(uint32_t dv_ind) {
ret = rsmi_counter_available_counters_get(dv_ind, grp.group(),
&tmp_cntrs);
CHK_ERR_ASRT(ret)
ASSERT_EQ(tmp_cntrs, (avail_counters - j -1));
ASSERT_EQ(tmp_cntrs, (avail_counters - j - 1));
}
sleep(5);
IF_VERB(STANDARD) {
std::cout << "\tStop Counters..." << std::endl;
}
for (uint32_t j = 0; j < avail_counters; ++j) {
tmp = static_cast<rsmi_event_type_t>(evnt + j);
ret = rsmi_counter_control(evt_handle[tmp], RSMI_CNTR_CMD_STOP,
nullptr);
CHK_ERR_ASRT(ret)
}
sleep(1);
IF_VERB(STANDARD) {
std::cout << "\tRead Counters..." << std::endl;
+3
Просмотреть файл
@@ -72,6 +72,9 @@ class TestPerfCntrReadWrite : public TestBase {
virtual void DisplayTestInfo(void);
private:
void CountEvents(uint32_t dv_ind,
rsmi_event_type_t evnt, rsmi_counter_value_t *val,
int32_t sleep_sec = 1);
void testEventsIndividually(uint32_t dv_ind);
void testEventsSimultaneously(uint32_t dv_ind);
};