Rename ROCR -> Runtime and ROCP -> Profiler

Change-Id: If90953da8fa5d695b681813dad4a3e7ec26a9c7e
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Αυτή η υποβολή περιλαμβάνεται σε:
Galantsev, Dmitrii
2024-02-22 02:40:58 -06:00
γονέας 796435c568
υποβολή 234b2d835b
16 αρχεία άλλαξαν με 295 προσθήκες και 212 διαγραφές
+4 -4
Προβολή Αρχείου
@@ -48,13 +48,13 @@ option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON)
# which requires the ROCT-Thunk-Interface.
option(BUILD_RASLIB "Build targets for raslib" OFF)
# When cmake -DBUILD_ROCRTEST=off, it will not build the librdc_rocr.so
# When cmake -DBUILD_RUNTIME=off, it will not build the librdc_rocr.so
# which requires the Rocm run time.
option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON)
option(BUILD_RUNTIME "Build targets for librdc_rocr.so" ON)
# When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so
# When cmake -DBUILD_PROFILER=off, it will not build the librdc_rocp.so
# which requires the Rocm profiler.
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF)
option(BUILD_PROFILER "Build targets for librdc_rocp.so" OFF)
# When cmake -DBUILD_RVS=off, it will not build the librdc_rvs.so
# which requires the RocmValidationSuite
+2 -2
Προβολή Αρχείου
@@ -83,9 +83,9 @@ If only the RDC libraries are needed (i.e. only "embedded mode" is required), th
## Building RDC library without ROCM Run time (optional)
The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_ROCRTEST=off should be passed on the the cmake command line:
The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_RUNTIME=off should be passed on the the cmake command line:
cmake -B build -DBUILD_ROCRTEST=off
cmake -B build -DBUILD_RUNTIME=off
## Update System Library Path
@@ -1,7 +1,7 @@
# This module provides a rocmtools::rocmtools package
# This module provides a rocprofiler::rocprofiler package
# You can specify the ROCM directory by setting ROCM_DIR
set(NAME rocmtools)
set(NAME rocprofiler)
if(NOT DEFINED ROCM_DIR)
set(ROCM_DIR "/opt/rocm")
@@ -98,6 +98,13 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
// ROCProfiler fields
// This doesn't map to rocprofiler counters directly
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
// See metrics.xml in rocprofiler
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false)
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false)
// Events
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
FLD_DESC_ENT(RDC_EVNT_XGMI_0_REQ_TX, "Outgoing requests to neighbor 0", "XGMI_REQ_0", false)
+6
Προβολή Αρχείου
@@ -247,6 +247,12 @@ typedef enum {
RDC_FI_XGMI_6_WRITE_KB, //!< XGMI_6 accumulated data write size (KB)
RDC_FI_XGMI_7_WRITE_KB, //!< XGMI_7 accumulated data write size (KB)
/**
* @brief ROC-profiler related fields
*/
RDC_FI_PROF_GPU_UTIL = 800, //!<
RDC_FI_PROF_TA_BUSY_AVR, //!<
/*
* @brief Raw XGMI counter events
*/
@@ -36,7 +36,7 @@ THE SOFTWARE.
namespace amd {
namespace rdc {
class RdcEmbeddedHandler : public RdcHandler {
class RdcEmbeddedHandler final : public RdcHandler {
public:
// Job API
rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId, const char job_id[64],
@@ -91,7 +91,7 @@ class RdcEmbeddedHandler : public RdcHandler {
rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override;
explicit RdcEmbeddedHandler(rdc_operation_mode_t op_mode);
~RdcEmbeddedHandler();
~RdcEmbeddedHandler() final;
private:
rdc_status_t get_gpu_gauges(rdc_gpu_gauges_t* gpu_gauges);
@@ -67,7 +67,7 @@ struct MetricTask {
std::function<void(RdcMetricFetcherImpl&, RdcFieldKey)> task;
};
class RdcMetricFetcherImpl : public RdcMetricFetcher {
class RdcMetricFetcherImpl final : public RdcMetricFetcher {
public:
rdc_status_t fetch_smi_field(uint32_t gpu_index, rdc_field_t field_id,
rdc_field_value* value) override;
@@ -75,7 +75,7 @@ class RdcMetricFetcherImpl : public RdcMetricFetcher {
rdc_gpu_field_t* fields, uint32_t fields_count,
std::vector<rdc_gpu_field_value_t>& results) override; // NOLINT
RdcMetricFetcherImpl();
~RdcMetricFetcherImpl();
~RdcMetricFetcherImpl() final;
rdc_status_t acquire_smi_handle(RdcFieldKey fk) override;
rdc_status_t delete_smi_handle(RdcFieldKey fk) override;
@@ -31,12 +31,13 @@ THE SOFTWARE.
namespace amd {
namespace rdc {
class RdcMetricsUpdaterImpl : public RdcMetricsUpdater {
class RdcMetricsUpdaterImpl final : public RdcMetricsUpdater {
public:
void start() override;
void stop() override;
explicit RdcMetricsUpdaterImpl(const RdcWatchTablePtr& watch_table,
const uint32_t check_frequency);
~RdcMetricsUpdaterImpl() = default;
private:
RdcWatchTablePtr watch_table_;
@@ -39,43 +39,32 @@ class RdcRocpLib : public RdcTelemetry {
// get support field ids
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS],
uint32_t* field_count) override;
// Fetch
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count,
rdc_field_value_f callback, void* user_data) override;
rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count) override;
rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields,
uint32_t fields_count) override;
RdcRocpLib();
~RdcRocpLib();
private:
RdcLibraryLoader lib_loader_;
rdc_status_t (*telemetry_fields_query_)(uint32_t field_ids[MAX_NUM_FIELDS],
uint32_t* field_count);
rdc_status_t (*telemetry_fields_value_get_)(rdc_gpu_field_t* fields, uint32_t fields_count,
rdc_field_value_f callback, void* user_data);
rdc_status_t (*telemetry_fields_watch_)(rdc_gpu_field_t* fields, uint32_t fields_count);
rdc_status_t (*telemetry_fields_unwatch_)(rdc_gpu_field_t* fields, uint32_t fields_count);
/**
* @brief Extract current ROCM_PATH from library or the environment
*/
std::string get_rocm_path();
/**
* @brief Set ROCMTOOLS_METRICS_PATH environment variable needed by
* librocmtools
* librocprofiler
*/
rdc_status_t set_rocmtools_path();
rdc_status_t set_rocprofiler_path();
};
using RdcRocpLibPtr = std::shared_ptr<RdcRocpLib>;
@@ -22,15 +22,17 @@ THE SOFTWARE.
#ifndef RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
#define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
#include <rocmtools.h>
#include <rocprofiler/rocprofiler.h>
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <list>
#include <map>
#include <string>
#include <typeinfo>
#include <unordered_map>
#include <vector>
#include "rdc/rdc.h"
@@ -43,33 +45,21 @@ namespace rdc {
* See metrics.xml in rocmtools for more info.
* RDC_CALC fields are calculated over time by RDC.
*/
static const std::unordered_map<rdc_field_t, const char*> counter_map_k = {
{RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"},
{RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"},
{RDC_FI_PROF_ACTIVE_CYCLES, "SQ_BUSY_CU_CYCLES"},
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
{RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"},
{RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"},
{RDC_FI_PROF_FLOPS_16, "TOTAL_16_OPS"},
{RDC_FI_PROF_FLOPS_32, "TOTAL_32_OPS"},
{RDC_FI_PROF_FLOPS_64, "TOTAL_64_OPS"},
// fields below require special handling
{RDC_FI_PROF_GFLOPS_16, "TOTAL_16_OPS"},
{RDC_FI_PROF_GFLOPS_32, "TOTAL_32_OPS"},
{RDC_FI_PROF_GFLOPS_64, "TOTAL_64_OPS"},
{RDC_FI_PROF_MEMR_BW_KBPNS, "FETCH_SIZE"},
{RDC_FI_PROF_MEMW_BW_KBPNS, "WRITE_SIZE"},
static const std::map<rdc_field_t, const char*> counter_map_k = {
{RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"},
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
};
typedef struct {
hsa_agent_t* agents;
unsigned count;
unsigned capacity;
} hsa_agent_arr_t;
/// Common interface for RocP tests and samples
class RdcRocpBase {
static const int dev_count = 1;
typedef std::pair<uint32_t, rdc_field_t> pair_gpu_field_t;
typedef struct session_info_t {
rocmtools_session_id_t id{};
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> start_time;
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> stop_time;
} session_info_t;
public:
RdcRocpBase();
@@ -90,43 +80,21 @@ class RdcRocpBase {
*/
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
/**
* @brief Destroy ROCmTools session responsible for monitoring a given
* field
*
* @details While rocmtools supports multiple fields per ID - it has a
* limit to how many counters it can query internally.
* To avoid concerning ourselves with said limit, we limit each session to
* 1 field.
* In the future this can be optimized to allow for multiple fields per
* session.
*
* @param[in] field A field to start monitoring
*
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
* successfully.
*/
rdc_status_t create_session(pair_gpu_field_t gpu_field);
/**
* @brief Destroy ROCmTools session responsible for monitoring a given
* field
*
* @param[in] field A field to stop monitoring
*
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
* successfully.
*/
rdc_status_t destroy_session(pair_gpu_field_t gpu_field);
protected:
private:
std::map<pair_gpu_field_t, session_info_t> sessions;
rocprofiler_t* contexts[dev_count] = {nullptr};
static const int features_count = 1;
std::map<const char*, double> metrics;
rocprofiler_feature_t features[dev_count][features_count];
void read_features(rocprofiler_t* context, const unsigned feature_count);
int run_profiler(const char* feature_name, hsa_queue_t** queues);
hsa_queue_t* queues[dev_count] = {nullptr};
hsa_agent_arr_t agent_arr;
/**
* @brief Convert from rocmtools status into RDC status
*/
rdc_status_t Rocp2RdcError(rocmtools_status_t rocm_status);
rdc_status_t Rocp2RdcError(hsa_status_t rocm_status);
};
} // namespace rdc
@@ -31,6 +31,7 @@ THE SOFTWARE.
#include "rdc_lib/impl/RdcDiagnosticModule.h"
#include "rdc_lib/impl/RdcRVSLib.h"
#include "rdc_lib/impl/RdcRasLib.h"
#include "rdc_lib/impl/RdcRocpLib.h"
#include "rdc_lib/impl/RdcRocrLib.h"
#include "rdc_lib/impl/RdcSmiLib.h"
#include "rdc_lib/impl/RdcTelemetryModule.h"
@@ -41,7 +42,7 @@ namespace rdc {
// pass shared_ptr instead of creating it
template <typename T>
rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr<T> ptr) {
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetryModule, T>);
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetry, T>);
RDC_LOG(RDC_DEBUG, "Inserting module: " << typeid(T).name());
// same module can service multiple subsystems
// e.g. Diagnostics and Telemetry
@@ -57,7 +58,7 @@ rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr<T> ptr) {
// base case
template <typename T>
rdc_status_t RdcModuleMgrImpl::insert_modules() {
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetryModule, T>);
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetry, T>);
try {
auto ptr = std::make_shared<T>();
return insert_modules(ptr);
@@ -88,7 +89,7 @@ RdcModuleMgrImpl::RdcModuleMgrImpl(const RdcMetricFetcherPtr& fetcher) : fetcher
}
// all other modules get initialized by insert_modules
insert_modules<RdcRasLib, RdcRVSLib, RdcRocrLib>();
insert_modules<RdcRasLib, RdcRVSLib, RdcRocrLib, RdcRocpLib>();
}
RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() {
@@ -38,16 +38,16 @@ RdcRocpLib::RdcRocpLib()
telemetry_fields_value_get_(nullptr),
telemetry_fields_watch_(nullptr),
telemetry_fields_unwatch_(nullptr) {
rdc_status_t status = lib_loader_.load("librdc_rocp.so");
rdc_status_t status = set_rocprofiler_path();
if (status != RDC_ST_OK) {
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocprofiler path could not be set");
return;
}
status = set_rocmtools_path();
status = lib_loader_.load("librdc_rocp.so");
if (status != RDC_ST_OK) {
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocmtools path could not be set");
return;
}
@@ -142,7 +142,7 @@ std::string RdcRocpLib::get_rocm_path() {
std::string line;
while (getline(file, line)) {
size_t index_end = line.find("librocmtools.so");
size_t index_end = line.find("librocprofiler64.so");
size_t index_start = index_end;
if (index_end == std::string::npos) {
// no library on this line
@@ -162,32 +162,38 @@ std::string RdcRocpLib::get_rocm_path() {
return rocm_path;
}
rdc_status_t RdcRocpLib::set_rocmtools_path() {
// librocmtools requires ROCMTOOLS_METRICS_PATH to be set
std::string rocmtools_metrics_path =
get_rocm_path() + "/libexec/rocmtools/counters/derived_counters.xml";
rdc_status_t RdcRocpLib::set_rocprofiler_path() {
// librocprofiler64 requires ROCPROFILER_METRICS_PATH to be set
std::string rocprofiler_metrics_path =
get_rocm_path() + "/libexec/rocprofiler/counters/derived_counters.xml";
// set rocm prefix
int result = setenv("ROCMTOOLS_METRICS_PATH", rocmtools_metrics_path.c_str(), 0);
int result = setenv("ROCPROFILER_METRICS_PATH", rocprofiler_metrics_path.c_str(), 0);
if (result != 0) {
RDC_LOG(RDC_ERROR, "setenv ROCMTOOLS_METRICS_PATH failed! " << result);
RDC_LOG(RDC_ERROR, "setenv ROCPROFILER_METRICS_PATH failed! " << result);
return RDC_ST_PERM_ERROR;
}
// check that env exists
const char* rocmtools_metrics_env = getenv("ROCMTOOLS_METRICS_PATH");
if (rocmtools_metrics_env == nullptr) {
RDC_LOG(RDC_ERROR, "ROCMTOOLS_METRICS_PATH is not set!");
const char* rocprofiler_metrics_env = getenv("ROCPROFILER_METRICS_PATH");
if (rocprofiler_metrics_env == nullptr) {
RDC_LOG(RDC_ERROR, "ROCPROFILER_METRICS_PATH is not set!");
return RDC_ST_NO_DATA;
}
// check that file can be accessed
std::ifstream test_file(rocmtools_metrics_env);
std::ifstream test_file(rocprofiler_metrics_env);
if (!test_file.good()) {
RDC_LOG(RDC_ERROR, "failed to open ROCMTOOLS_METRICS_PATH: " << rocmtools_metrics_env);
RDC_LOG(RDC_ERROR, "failed to open ROCPROFILER_METRICS_PATH: " << rocprofiler_metrics_env);
return RDC_ST_FILE_ERROR;
}
result = setenv("ROCP_METRICS", rocprofiler_metrics_path.c_str(), 0);
if (result != 0) {
RDC_LOG(RDC_ERROR, "setenv ROCP_METRICS failed! " << result);
return RDC_ST_PERM_ERROR;
}
return RDC_ST_OK;
}
@@ -17,19 +17,22 @@ set(RDC_ROCP_LIB_INC_LIST
"${RDC_LIB_INC_DIR}/RdcLogger.h"
"${INC_DIR}/RdcRocpBase.h")
if(BUILD_ROCPTEST)
if(BUILD_PROFILER)
message("Build librdc_rocp.so is enabled, make sure ROCmTools is installed.")
message("RDC_ROCP_LIB_INC_LIST=${RDC_ROCP_LIB_INC_LIST}")
set(ROCMTOOLS_LIB rocmtools::rocmtools)
# below provides rocmtools::rocmtools package
include(Findrocmtools)
set(ROCPROFILER_LIB rocprofiler::rocprofiler)
# below provides rocprofiler::rocprofiler package
include(Findrocprofiler)
set(HSA_LIB "hsa-runtime64")
find_package(hsa-runtime64
NAMES hsa-runtime64
HINTS ${ROCM_DIR}/lib/cmake
CONFIGURE REQUIRED)
set(RDC_LIB_MODULES ${RDC_LIB_MODULES} ${RDC_ROCP_LIB} PARENT_SCOPE)
add_library(${RDC_ROCP_LIB} SHARED ${RDC_ROCP_LIB_SRC_LIST} ${RDC_ROCP_LIB_INC_LIST})
target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} ${HSA_LIB} ${ROCMTOOLS_LIB} pthread dl)
target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} hsa-runtime64::hsa-runtime64 ${ROCPROFILER_LIB} pthread dl)
target_include_directories(${RDC_ROCP_LIB} PRIVATE
"${PROJECT_SOURCE_DIR}"
"${PROJECT_SOURCE_DIR}/include"
@@ -22,14 +22,13 @@ THE SOFTWARE.
#include "rdc_modules/rdc_rocp/RdcRocpBase.h"
#include <rocmtools.h>
#include <rocprofiler/rocprofiler.h>
#include <unistd.h>
#include <cassert>
#include <chrono>
#include <cstring>
#include <vector>
#include "hsa.h"
// #include "hsa.h"
#include "rdc/rdc.h"
#include "rdc_lib/RdcLogger.h"
#include "rdc_lib/rdc_common.h"
@@ -37,7 +36,162 @@ THE SOFTWARE.
namespace amd {
namespace rdc {
static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
hsa_device_type_t type;
hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr;
hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
if (hsa_errno != HSA_STATUS_SUCCESS) {
return hsa_errno;
}
if (type == HSA_DEVICE_TYPE_GPU) {
if (agent_arr_->count >= agent_arr_->capacity) {
agent_arr_->capacity *= 2;
agent_arr_->agents =
(hsa_agent_t*)realloc(agent_arr_->agents, agent_arr_->capacity * sizeof(hsa_agent_t));
// realloc might set agents to nullptr upon failure
assert(agent_arr_->agents != nullptr);
}
agent_arr_->agents[agent_arr_->count] = agent;
++agent_arr_->count;
}
return HSA_STATUS_SUCCESS;
}
void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) {
hsa_status_t hsa_errno = rocprofiler_read(context, 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = rocprofiler_get_data(context, 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = rocprofiler_get_metrics(context);
assert(hsa_errno == HSA_STATUS_SUCCESS);
for (auto i = 0; i < feature_count; i++) {
switch (features[0][i].data.kind) {
case ROCPROFILER_DATA_KIND_DOUBLE:
metrics[features[0][i].name] = features[0][i].data.result_double;
break;
default:
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind);
}
}
}
static int get_agents(hsa_agent_arr_t* agent_arr) {
int errcode = 0;
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
agent_arr->capacity = 1;
agent_arr->count = 0;
agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t));
assert(agent_arr->agents);
hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
if (hsa_errno != HSA_STATUS_SUCCESS) {
errcode = -1;
agent_arr->capacity = 0;
agent_arr->count = 0;
free(agent_arr->agents);
}
return errcode;
}
bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
// create a single-producer queue
// TODO: check if API args are correct, especially UINT32_MAX
hsa_status_t status;
status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
UINT32_MAX, queue);
if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "Queue creation failed");
// TODO: warning: is it really required!! ??
status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "HSA Queue Priority Set Failed");
return (status == HSA_STATUS_SUCCESS);
}
int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
const char* events[features_count] = {feature_name};
// initialize hsa. hsa_init() will also load the profiler libs under the hood
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
for (int i = 0; i < dev_count; ++i) {
for (int j = 0; j < features_count; ++j) {
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
features[i][j].name = events[j];
}
}
rocprofiler_t* contexts[dev_count] = {0};
for (int i = 0; i < dev_count; ++i) {
rocprofiler_properties_t properties = {
queues[i],
64,
NULL,
NULL,
};
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i],
mode, &properties);
const char* error_string;
rocprofiler_error_string(&error_string);
if (error_string != NULL) {
fprintf(stdout, "%s", error_string);
fflush(stdout);
}
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_start(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
// this is the duration for which the counter increments from zero.
usleep(10000);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_stop(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
for (int i = 0; i < dev_count; ++i) {
// printf("Iteration %d\n", loopcount++);
// fprintf(stdout, "------ Collecting Device[%d] -------\n", i);
read_features(contexts[i], features_count);
}
usleep(100);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_close(contexts[i]);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
return 0;
}
RdcRocpBase::RdcRocpBase() {
// populate monitored fields
const std::map<rdc_field_t, const char*> counter_map_k = {
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
};
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
for (auto& [k, v] : counter_map_k) {
metrics[v] = 0.0;
}
assert(metrics.size() == counter_map_k.size());
printf("Metric size %d\n", (int)metrics.size());
for (auto& metric : metrics) {
printf("Metric: %s\n", metric.first);
}
hsa_status_t err = hsa_init();
if (err != HSA_STATUS_SUCCESS) {
const char* errstr = nullptr;
@@ -45,124 +199,72 @@ RdcRocpBase::RdcRocpBase() {
throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr);
}
auto status = rocmtools_initialize();
RDC_LOG(RDC_INFO, "rocmtools_initialize status: " << status);
// populate list of agents
int errcode = get_agents(&agent_arr);
if (errcode != 0) {
return;
}
printf("number of devices: %u\n", agent_arr.count);
printf("devices being profiled: %u\n", dev_count);
for (int i = 0; i < dev_count; ++i) {
int j = 0;
for (auto& metric : metrics) {
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
features[i][j].name = metric.first;
printf("Metric[%d]: %s\n", j, features[i][j].name);
j++;
}
}
for (int i = 0; i < dev_count; ++i) {
if (!createHsaQueue(&queues[i], agent_arr.agents[i])) {
fprintf(stdout, "can't create queues[%d]\n", i);
}
}
}
RdcRocpBase::~RdcRocpBase() {
for (auto& session : sessions) {
const rdc_status_t status = destroy_session(session.first);
assert(status == RDC_ST_OK);
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_stop(contexts[i], 0);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
sessions.clear();
auto status = rocmtools_finalize();
RDC_LOG(RDC_INFO, "rocmtools_finalize status: " << status);
hsa_status_t err = hsa_shut_down();
if (err != HSA_STATUS_SUCCESS) {
const char* errstr = nullptr;
hsa_status_string(err, &errstr);
// cannot throw an error here. print instead
RDC_LOG(RDC_ERROR, "hsa error code: " + std::to_string(err) + " " + errstr);
for (int i = 0; i < dev_count; ++i) {
hsa_errno = rocprofiler_close(contexts[i]);
assert(hsa_errno == HSA_STATUS_SUCCESS);
}
hsa_errno = hsa_shut_down();
assert(hsa_errno == HSA_STATUS_SUCCESS);
hsa_errno = hsa_shut_down();
assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED);
}
rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) {
if (sessions.empty()) {
return RDC_ST_NOT_FOUND;
}
if (value == nullptr) {
return RDC_ST_BAD_PARAMETER;
}
rocmtools_device_profile_metric_t counter;
session_info_t session = sessions.at(gpu_field);
const rocmtools_status_t status = rocmtools_device_profiling_session_poll(session.id, &counter);
session.stop_time = std::chrono::high_resolution_clock::now();
if (status != ROCMTOOLS_STATUS_SUCCESS) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (status != HSA_STATUS_SUCCESS) {
return Rocp2RdcError(status);
}
const auto elapsed =
std::chrono::duration_cast<std::chrono::nanoseconds>(session.stop_time - session.start_time)
.count();
// some metrics are derived from others and depend on time passed
switch (gpu_field.second) {
case RDC_FI_PROF_GFLOPS_16:
case RDC_FI_PROF_GFLOPS_32:
case RDC_FI_PROF_GFLOPS_64:
case RDC_FI_PROF_MEMR_BW_KBPNS:
case RDC_FI_PROF_MEMW_BW_KBPNS:
*value = counter.value.value / elapsed;
break;
default:
*value = counter.value.value;
run_profiler("TA_BUSY_avr", queues);
// read_features(contexts[gpu_field.first], features_count);
*value = metrics[counter_map_k.at(gpu_field.second)];
break;
}
return Rocp2RdcError(status);
}
rdc_status_t RdcRocpBase::create_session(pair_gpu_field_t gpu_field) {
if (sessions.count(gpu_field) != 0) {
RDC_LOG(RDC_DEBUG, "Session for field (" << gpu_field.second << ") on GPU [" << gpu_field.first
<< "] already exists!");
return RDC_ST_ALREADY_EXIST;
}
session_info_t session = {};
std::vector<const char*> rocmtools_fields = {counter_map_k.at(gpu_field.second)};
// create session
rocmtools_status_t status = rocmtools_device_profiling_session_create(
rocmtools_fields.data(), rocmtools_fields.size(), &session.id, 0, gpu_field.first);
if (status != ROCMTOOLS_STATUS_SUCCESS) {
return Rocp2RdcError(status);
}
// add start time
session.start_time = std::chrono::high_resolution_clock::now();
sessions.emplace(gpu_field, session);
// start session
status = rocmtools_device_profiling_session_start(session.id);
return Rocp2RdcError(status);
}
rdc_status_t RdcRocpBase::destroy_session(pair_gpu_field_t gpu_field) {
if (sessions.empty()) {
RDC_LOG(RDC_DEBUG, "Cannot destroy empty session...");
return RDC_ST_OK;
}
// no session with field
if (sessions.count(gpu_field) == 0) {
RDC_LOG(RDC_DEBUG, "Cannot destroy session with field (" << gpu_field.second << ") on GPU ["
<< gpu_field.first
<< "] because it doesn't exist...");
return RDC_ST_OK;
}
const rocmtools_session_id_t session_id = sessions.at(gpu_field).id;
const rocmtools_status_t status = rocmtools_device_profiling_session_destroy(session_id);
if (status == ROCMTOOLS_STATUS_SUCCESS) {
const auto num_of_destroyed_sessions = sessions.erase(gpu_field);
RDC_LOG(RDC_DEBUG, "destroyed (" << num_of_destroyed_sessions << ") sessions");
}
return Rocp2RdcError(status);
}
rdc_status_t RdcRocpBase::Rocp2RdcError(rocmtools_status_t rocm_status) {
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) {
switch (rocm_status) {
case ROCMTOOLS_STATUS_SUCCESS:
case HSA_STATUS_SUCCESS:
return RDC_ST_OK;
case ROCMTOOLS_STATUS_ERROR_HAS_ACTIVE_SESSION:
return RDC_ST_ALREADY_EXIST;
case ROCMTOOLS_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH:
case ROCMTOOLS_STATUS_ERROR_SESSION_MISSING_FILTER:
case ROCMTOOLS_STATUS_ERROR_SESSION_NOT_FOUND:
return RDC_ST_BAD_PARAMETER;
default:
return RDC_ST_UNKNOWN_ERROR;
}
@@ -36,6 +36,8 @@ THE SOFTWARE.
amd::rdc::RdcRocpBase rocp;
rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
// get supported field ids
// TODO: Query fields with rocprofiler
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
@@ -54,7 +56,7 @@ rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint
}
// Fetch
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count,
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint32_t fields_count,
rdc_field_value_f callback, void* user_data) {
//
// Bulk fetch fields
@@ -69,7 +71,8 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi
rdc_gpu_field_value_t values[BULK_FIELDS_MAX];
uint32_t bulk_count = 0;
rdc_status_t status = RDC_ST_UNKNOWN_ERROR;
double value = 0;
double data;
for (uint32_t i = 0; i < fields_count; i++) {
if (bulk_count >= BULK_FIELDS_MAX) {
status = callback(values, bulk_count, user_data);
@@ -80,14 +83,13 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi
bulk_count = 0;
}
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &value);
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data);
// get value
values[bulk_count].gpu_index = fields[i].gpu_index;
values[bulk_count].field_value.type = DOUBLE;
values[bulk_count].field_value.status = status;
values[bulk_count].field_value.ts = curTime;
values[bulk_count].field_value.value.dbl = value;
values[bulk_count].field_value.value.dbl = data;
values[bulk_count].field_value.field_id = fields[i].field_id;
bulk_count++;
}
@@ -106,8 +108,7 @@ rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields
rdc_status_t status = RDC_ST_OK;
for (uint32_t i = 0; i < fields_count; i++) {
RDC_LOG(RDC_DEBUG, "WATCH: " << fields[i].field_id);
const rdc_status_t temp_status =
rocp.create_session(std::make_pair(fields[i].gpu_index, fields[i].field_id));
const rdc_status_t temp_status = RDC_ST_OK;
if (temp_status != RDC_ST_OK) {
status = temp_status;
}
@@ -119,8 +120,7 @@ rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, uint32_t fiel
rdc_status_t status = RDC_ST_OK;
for (uint32_t i = 0; i < fields_count; i++) {
RDC_LOG(RDC_DEBUG, "UNWATCH: " << fields[i].field_id);
const rdc_status_t temp_status =
rocp.destroy_session(std::make_pair(fields[i].gpu_index, fields[i].field_id));
const rdc_status_t temp_status = RDC_ST_OK;
// return last non-ok status
if (temp_status != RDC_ST_OK) {
status = temp_status;
@@ -29,7 +29,7 @@ set(RDC_ROCR_LIB_INC_LIST
"${RDC_LIB_INC_DIR}/RdcDiagnosticLibInterface.h"
"${RDC_LIB_INC_DIR}/rdc_common.h")
if(BUILD_ROCRTEST)
if(BUILD_RUNTIME)
message("Build librdc_rocr.so is enabled, make sure the Rocm run time is installed.")
message("RDC_ROCR_LIB_INC_LIST=${RDC_ROCR_LIB_INC_LIST}")