Add rocprofiler plugin
Rename ROCR -> Runtime and ROCP -> Profiler Change-Id: If90953da8fa5d695b681813dad4a3e7ec26a9c7e Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Αυτή η υποβολή περιλαμβάνεται σε:
@@ -48,13 +48,13 @@ option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON)
|
||||
# which requires the ROCT-Thunk-Interface.
|
||||
option(BUILD_RASLIB "Build targets for raslib" OFF)
|
||||
|
||||
# When cmake -DBUILD_ROCRTEST=off, it will not build the librdc_rocr.so
|
||||
# When cmake -DBUILD_RUNTIME=off, it will not build the librdc_rocr.so
|
||||
# which requires the Rocm run time.
|
||||
option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON)
|
||||
option(BUILD_RUNTIME "Build targets for librdc_rocr.so" ON)
|
||||
|
||||
# When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so
|
||||
# When cmake -DBUILD_PROFILER=off, it will not build the librdc_rocp.so
|
||||
# which requires the Rocm profiler.
|
||||
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF)
|
||||
option(BUILD_PROFILER "Build targets for librdc_rocp.so" OFF)
|
||||
|
||||
# When cmake -DBUILD_RVS=off, it will not build the librdc_rvs.so
|
||||
# which requires the RocmValidationSuite
|
||||
|
||||
+2
-2
@@ -83,9 +83,9 @@ If only the RDC libraries are needed (i.e. only "embedded mode" is required), th
|
||||
|
||||
## Building RDC library without ROCM Run time (optional)
|
||||
|
||||
The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_ROCRTEST=off should be passed on the the cmake command line:
|
||||
The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_RUNTIME=off should be passed on the the cmake command line:
|
||||
|
||||
cmake -B build -DBUILD_ROCRTEST=off
|
||||
cmake -B build -DBUILD_RUNTIME=off
|
||||
|
||||
## Update System Library Path
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# This module provides a rocmtools::rocmtools package
|
||||
# This module provides a rocprofiler::rocprofiler package
|
||||
# You can specify the ROCM directory by setting ROCM_DIR
|
||||
|
||||
set(NAME rocmtools)
|
||||
set(NAME rocprofiler)
|
||||
|
||||
if(NOT DEFINED ROCM_DIR)
|
||||
set(ROCM_DIR "/opt/rocm")
|
||||
@@ -98,6 +98,13 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB)
|
||||
|
||||
|
||||
|
||||
// ROCProfiler fields
|
||||
// This doesn't map to rocprofiler counters directly
|
||||
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
||||
// See metrics.xml in rocprofiler
|
||||
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false)
|
||||
|
||||
// Events
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_REQ_TX, "Outgoing requests to neighbor 0", "XGMI_REQ_0", false)
|
||||
|
||||
@@ -247,6 +247,12 @@ typedef enum {
|
||||
RDC_FI_XGMI_6_WRITE_KB, //!< XGMI_6 accumulated data write size (KB)
|
||||
RDC_FI_XGMI_7_WRITE_KB, //!< XGMI_7 accumulated data write size (KB)
|
||||
|
||||
/**
|
||||
* @brief ROC-profiler related fields
|
||||
*/
|
||||
RDC_FI_PROF_GPU_UTIL = 800, //!<
|
||||
RDC_FI_PROF_TA_BUSY_AVR, //!<
|
||||
|
||||
/*
|
||||
* @brief Raw XGMI counter events
|
||||
*/
|
||||
|
||||
@@ -36,7 +36,7 @@ THE SOFTWARE.
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdcEmbeddedHandler : public RdcHandler {
|
||||
class RdcEmbeddedHandler final : public RdcHandler {
|
||||
public:
|
||||
// Job API
|
||||
rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId, const char job_id[64],
|
||||
@@ -91,7 +91,7 @@ class RdcEmbeddedHandler : public RdcHandler {
|
||||
rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override;
|
||||
|
||||
explicit RdcEmbeddedHandler(rdc_operation_mode_t op_mode);
|
||||
~RdcEmbeddedHandler();
|
||||
~RdcEmbeddedHandler() final;
|
||||
|
||||
private:
|
||||
rdc_status_t get_gpu_gauges(rdc_gpu_gauges_t* gpu_gauges);
|
||||
|
||||
@@ -67,7 +67,7 @@ struct MetricTask {
|
||||
std::function<void(RdcMetricFetcherImpl&, RdcFieldKey)> task;
|
||||
};
|
||||
|
||||
class RdcMetricFetcherImpl : public RdcMetricFetcher {
|
||||
class RdcMetricFetcherImpl final : public RdcMetricFetcher {
|
||||
public:
|
||||
rdc_status_t fetch_smi_field(uint32_t gpu_index, rdc_field_t field_id,
|
||||
rdc_field_value* value) override;
|
||||
@@ -75,7 +75,7 @@ class RdcMetricFetcherImpl : public RdcMetricFetcher {
|
||||
rdc_gpu_field_t* fields, uint32_t fields_count,
|
||||
std::vector<rdc_gpu_field_value_t>& results) override; // NOLINT
|
||||
RdcMetricFetcherImpl();
|
||||
~RdcMetricFetcherImpl();
|
||||
~RdcMetricFetcherImpl() final;
|
||||
|
||||
rdc_status_t acquire_smi_handle(RdcFieldKey fk) override;
|
||||
rdc_status_t delete_smi_handle(RdcFieldKey fk) override;
|
||||
|
||||
@@ -31,12 +31,13 @@ THE SOFTWARE.
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdcMetricsUpdaterImpl : public RdcMetricsUpdater {
|
||||
class RdcMetricsUpdaterImpl final : public RdcMetricsUpdater {
|
||||
public:
|
||||
void start() override;
|
||||
void stop() override;
|
||||
explicit RdcMetricsUpdaterImpl(const RdcWatchTablePtr& watch_table,
|
||||
const uint32_t check_frequency);
|
||||
~RdcMetricsUpdaterImpl() = default;
|
||||
|
||||
private:
|
||||
RdcWatchTablePtr watch_table_;
|
||||
|
||||
@@ -39,43 +39,32 @@ class RdcRocpLib : public RdcTelemetry {
|
||||
// get support field ids
|
||||
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS],
|
||||
uint32_t* field_count) override;
|
||||
|
||||
// Fetch
|
||||
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count,
|
||||
rdc_field_value_f callback, void* user_data) override;
|
||||
|
||||
rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count) override;
|
||||
|
||||
rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields,
|
||||
uint32_t fields_count) override;
|
||||
|
||||
RdcRocpLib();
|
||||
|
||||
~RdcRocpLib();
|
||||
|
||||
private:
|
||||
RdcLibraryLoader lib_loader_;
|
||||
|
||||
rdc_status_t (*telemetry_fields_query_)(uint32_t field_ids[MAX_NUM_FIELDS],
|
||||
uint32_t* field_count);
|
||||
|
||||
rdc_status_t (*telemetry_fields_value_get_)(rdc_gpu_field_t* fields, uint32_t fields_count,
|
||||
rdc_field_value_f callback, void* user_data);
|
||||
|
||||
rdc_status_t (*telemetry_fields_watch_)(rdc_gpu_field_t* fields, uint32_t fields_count);
|
||||
|
||||
rdc_status_t (*telemetry_fields_unwatch_)(rdc_gpu_field_t* fields, uint32_t fields_count);
|
||||
|
||||
/**
|
||||
* @brief Extract current ROCM_PATH from library or the environment
|
||||
*/
|
||||
std::string get_rocm_path();
|
||||
|
||||
/**
|
||||
* @brief Set ROCMTOOLS_METRICS_PATH environment variable needed by
|
||||
* librocmtools
|
||||
* librocprofiler
|
||||
*/
|
||||
rdc_status_t set_rocmtools_path();
|
||||
rdc_status_t set_rocprofiler_path();
|
||||
};
|
||||
|
||||
using RdcRocpLibPtr = std::shared_ptr<RdcRocpLib>;
|
||||
|
||||
@@ -22,15 +22,17 @@ THE SOFTWARE.
|
||||
|
||||
#ifndef RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
|
||||
#define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_
|
||||
#include <rocmtools.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
@@ -43,33 +45,21 @@ namespace rdc {
|
||||
* See metrics.xml in rocmtools for more info.
|
||||
* RDC_CALC fields are calculated over time by RDC.
|
||||
*/
|
||||
static const std::unordered_map<rdc_field_t, const char*> counter_map_k = {
|
||||
{RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"},
|
||||
{RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"},
|
||||
{RDC_FI_PROF_ACTIVE_CYCLES, "SQ_BUSY_CU_CYCLES"},
|
||||
{RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"},
|
||||
{RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"},
|
||||
{RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"},
|
||||
{RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"},
|
||||
{RDC_FI_PROF_FLOPS_16, "TOTAL_16_OPS"},
|
||||
{RDC_FI_PROF_FLOPS_32, "TOTAL_32_OPS"},
|
||||
{RDC_FI_PROF_FLOPS_64, "TOTAL_64_OPS"},
|
||||
// fields below require special handling
|
||||
{RDC_FI_PROF_GFLOPS_16, "TOTAL_16_OPS"},
|
||||
{RDC_FI_PROF_GFLOPS_32, "TOTAL_32_OPS"},
|
||||
{RDC_FI_PROF_GFLOPS_64, "TOTAL_64_OPS"},
|
||||
{RDC_FI_PROF_MEMR_BW_KBPNS, "FETCH_SIZE"},
|
||||
{RDC_FI_PROF_MEMW_BW_KBPNS, "WRITE_SIZE"},
|
||||
static const std::map<rdc_field_t, const char*> counter_map_k = {
|
||||
{RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"},
|
||||
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
hsa_agent_t* agents;
|
||||
unsigned count;
|
||||
unsigned capacity;
|
||||
} hsa_agent_arr_t;
|
||||
|
||||
/// Common interface for RocP tests and samples
|
||||
class RdcRocpBase {
|
||||
static const int dev_count = 1;
|
||||
typedef std::pair<uint32_t, rdc_field_t> pair_gpu_field_t;
|
||||
typedef struct session_info_t {
|
||||
rocmtools_session_id_t id{};
|
||||
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> start_time;
|
||||
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds> stop_time;
|
||||
} session_info_t;
|
||||
|
||||
public:
|
||||
RdcRocpBase();
|
||||
@@ -90,43 +80,21 @@ class RdcRocpBase {
|
||||
*/
|
||||
rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value);
|
||||
|
||||
/**
|
||||
* @brief Destroy ROCmTools session responsible for monitoring a given
|
||||
* field
|
||||
*
|
||||
* @details While rocmtools supports multiple fields per ID - it has a
|
||||
* limit to how many counters it can query internally.
|
||||
* To avoid concerning ourselves with said limit, we limit each session to
|
||||
* 1 field.
|
||||
* In the future this can be optimized to allow for multiple fields per
|
||||
* session.
|
||||
*
|
||||
* @param[in] field A field to start monitoring
|
||||
*
|
||||
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
rdc_status_t create_session(pair_gpu_field_t gpu_field);
|
||||
|
||||
/**
|
||||
* @brief Destroy ROCmTools session responsible for monitoring a given
|
||||
* field
|
||||
*
|
||||
* @param[in] field A field to stop monitoring
|
||||
*
|
||||
* @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
rdc_status_t destroy_session(pair_gpu_field_t gpu_field);
|
||||
|
||||
protected:
|
||||
private:
|
||||
std::map<pair_gpu_field_t, session_info_t> sessions;
|
||||
rocprofiler_t* contexts[dev_count] = {nullptr};
|
||||
static const int features_count = 1;
|
||||
std::map<const char*, double> metrics;
|
||||
rocprofiler_feature_t features[dev_count][features_count];
|
||||
void read_features(rocprofiler_t* context, const unsigned feature_count);
|
||||
int run_profiler(const char* feature_name, hsa_queue_t** queues);
|
||||
hsa_queue_t* queues[dev_count] = {nullptr};
|
||||
hsa_agent_arr_t agent_arr;
|
||||
|
||||
/**
|
||||
* @brief Convert from rocmtools status into RDC status
|
||||
*/
|
||||
rdc_status_t Rocp2RdcError(rocmtools_status_t rocm_status);
|
||||
rdc_status_t Rocp2RdcError(hsa_status_t rocm_status);
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -31,6 +31,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcDiagnosticModule.h"
|
||||
#include "rdc_lib/impl/RdcRVSLib.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcRocpLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
#include "rdc_lib/impl/RdcTelemetryModule.h"
|
||||
@@ -41,7 +42,7 @@ namespace rdc {
|
||||
// pass shared_ptr instead of creating it
|
||||
template <typename T>
|
||||
rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr<T> ptr) {
|
||||
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetryModule, T>);
|
||||
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetry, T>);
|
||||
RDC_LOG(RDC_DEBUG, "Inserting module: " << typeid(T).name());
|
||||
// same module can service multiple subsystems
|
||||
// e.g. Diagnostics and Telemetry
|
||||
@@ -57,7 +58,7 @@ rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr<T> ptr) {
|
||||
// base case
|
||||
template <typename T>
|
||||
rdc_status_t RdcModuleMgrImpl::insert_modules() {
|
||||
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetryModule, T>);
|
||||
static_assert(std::is_base_of_v<RdcDiagnostic, T> || std::is_base_of_v<RdcTelemetry, T>);
|
||||
try {
|
||||
auto ptr = std::make_shared<T>();
|
||||
return insert_modules(ptr);
|
||||
@@ -88,7 +89,7 @@ RdcModuleMgrImpl::RdcModuleMgrImpl(const RdcMetricFetcherPtr& fetcher) : fetcher
|
||||
}
|
||||
|
||||
// all other modules get initialized by insert_modules
|
||||
insert_modules<RdcRasLib, RdcRVSLib, RdcRocrLib>();
|
||||
insert_modules<RdcRasLib, RdcRVSLib, RdcRocrLib, RdcRocpLib>();
|
||||
}
|
||||
|
||||
RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() {
|
||||
|
||||
@@ -38,16 +38,16 @@ RdcRocpLib::RdcRocpLib()
|
||||
telemetry_fields_value_get_(nullptr),
|
||||
telemetry_fields_watch_(nullptr),
|
||||
telemetry_fields_unwatch_(nullptr) {
|
||||
rdc_status_t status = lib_loader_.load("librdc_rocp.so");
|
||||
rdc_status_t status = set_rocprofiler_path();
|
||||
if (status != RDC_ST_OK) {
|
||||
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
|
||||
throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocprofiler path could not be set");
|
||||
return;
|
||||
}
|
||||
|
||||
status = set_rocmtools_path();
|
||||
status = lib_loader_.load("librdc_rocp.so");
|
||||
if (status != RDC_ST_OK) {
|
||||
RDC_LOG(RDC_ERROR, "Rocp related function will not work.");
|
||||
throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocmtools path could not be set");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -142,7 +142,7 @@ std::string RdcRocpLib::get_rocm_path() {
|
||||
|
||||
std::string line;
|
||||
while (getline(file, line)) {
|
||||
size_t index_end = line.find("librocmtools.so");
|
||||
size_t index_end = line.find("librocprofiler64.so");
|
||||
size_t index_start = index_end;
|
||||
if (index_end == std::string::npos) {
|
||||
// no library on this line
|
||||
@@ -162,32 +162,38 @@ std::string RdcRocpLib::get_rocm_path() {
|
||||
return rocm_path;
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpLib::set_rocmtools_path() {
|
||||
// librocmtools requires ROCMTOOLS_METRICS_PATH to be set
|
||||
std::string rocmtools_metrics_path =
|
||||
get_rocm_path() + "/libexec/rocmtools/counters/derived_counters.xml";
|
||||
rdc_status_t RdcRocpLib::set_rocprofiler_path() {
|
||||
// librocprofiler64 requires ROCPROFILER_METRICS_PATH to be set
|
||||
std::string rocprofiler_metrics_path =
|
||||
get_rocm_path() + "/libexec/rocprofiler/counters/derived_counters.xml";
|
||||
|
||||
// set rocm prefix
|
||||
int result = setenv("ROCMTOOLS_METRICS_PATH", rocmtools_metrics_path.c_str(), 0);
|
||||
int result = setenv("ROCPROFILER_METRICS_PATH", rocprofiler_metrics_path.c_str(), 0);
|
||||
if (result != 0) {
|
||||
RDC_LOG(RDC_ERROR, "setenv ROCMTOOLS_METRICS_PATH failed! " << result);
|
||||
RDC_LOG(RDC_ERROR, "setenv ROCPROFILER_METRICS_PATH failed! " << result);
|
||||
return RDC_ST_PERM_ERROR;
|
||||
}
|
||||
|
||||
// check that env exists
|
||||
const char* rocmtools_metrics_env = getenv("ROCMTOOLS_METRICS_PATH");
|
||||
if (rocmtools_metrics_env == nullptr) {
|
||||
RDC_LOG(RDC_ERROR, "ROCMTOOLS_METRICS_PATH is not set!");
|
||||
const char* rocprofiler_metrics_env = getenv("ROCPROFILER_METRICS_PATH");
|
||||
if (rocprofiler_metrics_env == nullptr) {
|
||||
RDC_LOG(RDC_ERROR, "ROCPROFILER_METRICS_PATH is not set!");
|
||||
return RDC_ST_NO_DATA;
|
||||
}
|
||||
|
||||
// check that file can be accessed
|
||||
std::ifstream test_file(rocmtools_metrics_env);
|
||||
std::ifstream test_file(rocprofiler_metrics_env);
|
||||
if (!test_file.good()) {
|
||||
RDC_LOG(RDC_ERROR, "failed to open ROCMTOOLS_METRICS_PATH: " << rocmtools_metrics_env);
|
||||
RDC_LOG(RDC_ERROR, "failed to open ROCPROFILER_METRICS_PATH: " << rocprofiler_metrics_env);
|
||||
return RDC_ST_FILE_ERROR;
|
||||
}
|
||||
|
||||
result = setenv("ROCP_METRICS", rocprofiler_metrics_path.c_str(), 0);
|
||||
if (result != 0) {
|
||||
RDC_LOG(RDC_ERROR, "setenv ROCP_METRICS failed! " << result);
|
||||
return RDC_ST_PERM_ERROR;
|
||||
}
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,19 +17,22 @@ set(RDC_ROCP_LIB_INC_LIST
|
||||
"${RDC_LIB_INC_DIR}/RdcLogger.h"
|
||||
"${INC_DIR}/RdcRocpBase.h")
|
||||
|
||||
if(BUILD_ROCPTEST)
|
||||
if(BUILD_PROFILER)
|
||||
message("Build librdc_rocp.so is enabled, make sure ROCmTools is installed.")
|
||||
|
||||
message("RDC_ROCP_LIB_INC_LIST=${RDC_ROCP_LIB_INC_LIST}")
|
||||
|
||||
set(ROCMTOOLS_LIB rocmtools::rocmtools)
|
||||
# below provides rocmtools::rocmtools package
|
||||
include(Findrocmtools)
|
||||
set(ROCPROFILER_LIB rocprofiler::rocprofiler)
|
||||
# below provides rocprofiler::rocprofiler package
|
||||
include(Findrocprofiler)
|
||||
|
||||
set(HSA_LIB "hsa-runtime64")
|
||||
find_package(hsa-runtime64
|
||||
NAMES hsa-runtime64
|
||||
HINTS ${ROCM_DIR}/lib/cmake
|
||||
CONFIGURE REQUIRED)
|
||||
set(RDC_LIB_MODULES ${RDC_LIB_MODULES} ${RDC_ROCP_LIB} PARENT_SCOPE)
|
||||
add_library(${RDC_ROCP_LIB} SHARED ${RDC_ROCP_LIB_SRC_LIST} ${RDC_ROCP_LIB_INC_LIST})
|
||||
target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} ${HSA_LIB} ${ROCMTOOLS_LIB} pthread dl)
|
||||
target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} hsa-runtime64::hsa-runtime64 ${ROCPROFILER_LIB} pthread dl)
|
||||
target_include_directories(${RDC_ROCP_LIB} PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${PROJECT_SOURCE_DIR}/include"
|
||||
|
||||
@@ -22,14 +22,13 @@ THE SOFTWARE.
|
||||
|
||||
#include "rdc_modules/rdc_rocp/RdcRocpBase.h"
|
||||
|
||||
#include <rocmtools.h>
|
||||
#include <rocprofiler/rocprofiler.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "hsa.h"
|
||||
// #include "hsa.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
@@ -37,7 +36,162 @@ THE SOFTWARE.
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) {
|
||||
hsa_device_type_t type;
|
||||
hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr;
|
||||
|
||||
hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
if (hsa_errno != HSA_STATUS_SUCCESS) {
|
||||
return hsa_errno;
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_GPU) {
|
||||
if (agent_arr_->count >= agent_arr_->capacity) {
|
||||
agent_arr_->capacity *= 2;
|
||||
agent_arr_->agents =
|
||||
(hsa_agent_t*)realloc(agent_arr_->agents, agent_arr_->capacity * sizeof(hsa_agent_t));
|
||||
// realloc might set agents to nullptr upon failure
|
||||
assert(agent_arr_->agents != nullptr);
|
||||
}
|
||||
agent_arr_->agents[agent_arr_->count] = agent;
|
||||
++agent_arr_->count;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) {
|
||||
hsa_status_t hsa_errno = rocprofiler_read(context, 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = rocprofiler_get_data(context, 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = rocprofiler_get_metrics(context);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
for (auto i = 0; i < feature_count; i++) {
|
||||
switch (features[0][i].data.kind) {
|
||||
case ROCPROFILER_DATA_KIND_DOUBLE:
|
||||
metrics[features[0][i].name] = features[0][i].data.result_double;
|
||||
break;
|
||||
default:
|
||||
RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int get_agents(hsa_agent_arr_t* agent_arr) {
|
||||
int errcode = 0;
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
|
||||
agent_arr->capacity = 1;
|
||||
agent_arr->count = 0;
|
||||
agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t));
|
||||
assert(agent_arr->agents);
|
||||
|
||||
hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr);
|
||||
if (hsa_errno != HSA_STATUS_SUCCESS) {
|
||||
errcode = -1;
|
||||
|
||||
agent_arr->capacity = 0;
|
||||
agent_arr->count = 0;
|
||||
free(agent_arr->agents);
|
||||
}
|
||||
|
||||
return errcode;
|
||||
}
|
||||
|
||||
bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) {
|
||||
// create a single-producer queue
|
||||
// TODO: check if API args are correct, especially UINT32_MAX
|
||||
hsa_status_t status;
|
||||
status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, queue);
|
||||
if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "Queue creation failed");
|
||||
|
||||
// TODO: warning: is it really required!! ??
|
||||
status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH);
|
||||
if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "HSA Queue Priority Set Failed");
|
||||
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) {
|
||||
const char* events[features_count] = {feature_name};
|
||||
|
||||
// initialize hsa. hsa_init() will also load the profiler libs under the hood
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
for (int j = 0; j < features_count; ++j) {
|
||||
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[i][j].name = events[j];
|
||||
}
|
||||
}
|
||||
|
||||
rocprofiler_t* contexts[dev_count] = {0};
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
rocprofiler_properties_t properties = {
|
||||
queues[i],
|
||||
64,
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP);
|
||||
hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i],
|
||||
mode, &properties);
|
||||
const char* error_string;
|
||||
rocprofiler_error_string(&error_string);
|
||||
if (error_string != NULL) {
|
||||
fprintf(stdout, "%s", error_string);
|
||||
fflush(stdout);
|
||||
}
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_start(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
// this is the duration for which the counter increments from zero.
|
||||
usleep(10000);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_stop(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
// printf("Iteration %d\n", loopcount++);
|
||||
// fprintf(stdout, "------ Collecting Device[%d] -------\n", i);
|
||||
read_features(contexts[i], features_count);
|
||||
}
|
||||
|
||||
usleep(100);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_close(contexts[i]);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
RdcRocpBase::RdcRocpBase() {
|
||||
// populate monitored fields
|
||||
const std::map<rdc_field_t, const char*> counter_map_k = {
|
||||
{RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"},
|
||||
};
|
||||
std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n";
|
||||
for (auto& [k, v] : counter_map_k) {
|
||||
metrics[v] = 0.0;
|
||||
}
|
||||
assert(metrics.size() == counter_map_k.size());
|
||||
|
||||
printf("Metric size %d\n", (int)metrics.size());
|
||||
for (auto& metric : metrics) {
|
||||
printf("Metric: %s\n", metric.first);
|
||||
}
|
||||
|
||||
hsa_status_t err = hsa_init();
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = nullptr;
|
||||
@@ -45,124 +199,72 @@ RdcRocpBase::RdcRocpBase() {
|
||||
throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr);
|
||||
}
|
||||
|
||||
auto status = rocmtools_initialize();
|
||||
RDC_LOG(RDC_INFO, "rocmtools_initialize status: " << status);
|
||||
// populate list of agents
|
||||
int errcode = get_agents(&agent_arr);
|
||||
if (errcode != 0) {
|
||||
return;
|
||||
}
|
||||
printf("number of devices: %u\n", agent_arr.count);
|
||||
printf("devices being profiled: %u\n", dev_count);
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
int j = 0;
|
||||
for (auto& metric : metrics) {
|
||||
features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC;
|
||||
features[i][j].name = metric.first;
|
||||
printf("Metric[%d]: %s\n", j, features[i][j].name);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
if (!createHsaQueue(&queues[i], agent_arr.agents[i])) {
|
||||
fprintf(stdout, "can't create queues[%d]\n", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RdcRocpBase::~RdcRocpBase() {
|
||||
for (auto& session : sessions) {
|
||||
const rdc_status_t status = destroy_session(session.first);
|
||||
assert(status == RDC_ST_OK);
|
||||
hsa_status_t hsa_errno = HSA_STATUS_SUCCESS;
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_stop(contexts[i], 0);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
sessions.clear();
|
||||
auto status = rocmtools_finalize();
|
||||
RDC_LOG(RDC_INFO, "rocmtools_finalize status: " << status);
|
||||
|
||||
hsa_status_t err = hsa_shut_down();
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
const char* errstr = nullptr;
|
||||
hsa_status_string(err, &errstr);
|
||||
// cannot throw an error here. print instead
|
||||
RDC_LOG(RDC_ERROR, "hsa error code: " + std::to_string(err) + " " + errstr);
|
||||
for (int i = 0; i < dev_count; ++i) {
|
||||
hsa_errno = rocprofiler_close(contexts[i]);
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
hsa_errno = hsa_shut_down();
|
||||
assert(hsa_errno == HSA_STATUS_SUCCESS);
|
||||
hsa_errno = hsa_shut_down();
|
||||
assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) {
|
||||
if (sessions.empty()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (value == nullptr) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
rocmtools_device_profile_metric_t counter;
|
||||
session_info_t session = sessions.at(gpu_field);
|
||||
const rocmtools_status_t status = rocmtools_device_profiling_session_poll(session.id, &counter);
|
||||
session.stop_time = std::chrono::high_resolution_clock::now();
|
||||
if (status != ROCMTOOLS_STATUS_SUCCESS) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
const auto elapsed =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(session.stop_time - session.start_time)
|
||||
.count();
|
||||
// some metrics are derived from others and depend on time passed
|
||||
switch (gpu_field.second) {
|
||||
case RDC_FI_PROF_GFLOPS_16:
|
||||
case RDC_FI_PROF_GFLOPS_32:
|
||||
case RDC_FI_PROF_GFLOPS_64:
|
||||
case RDC_FI_PROF_MEMR_BW_KBPNS:
|
||||
case RDC_FI_PROF_MEMW_BW_KBPNS:
|
||||
*value = counter.value.value / elapsed;
|
||||
break;
|
||||
default:
|
||||
*value = counter.value.value;
|
||||
run_profiler("TA_BUSY_avr", queues);
|
||||
// read_features(contexts[gpu_field.first], features_count);
|
||||
*value = metrics[counter_map_k.at(gpu_field.second)];
|
||||
break;
|
||||
}
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::create_session(pair_gpu_field_t gpu_field) {
|
||||
if (sessions.count(gpu_field) != 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Session for field (" << gpu_field.second << ") on GPU [" << gpu_field.first
|
||||
<< "] already exists!");
|
||||
return RDC_ST_ALREADY_EXIST;
|
||||
}
|
||||
|
||||
session_info_t session = {};
|
||||
|
||||
std::vector<const char*> rocmtools_fields = {counter_map_k.at(gpu_field.second)};
|
||||
// create session
|
||||
rocmtools_status_t status = rocmtools_device_profiling_session_create(
|
||||
rocmtools_fields.data(), rocmtools_fields.size(), &session.id, 0, gpu_field.first);
|
||||
|
||||
if (status != ROCMTOOLS_STATUS_SUCCESS) {
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
|
||||
// add start time
|
||||
session.start_time = std::chrono::high_resolution_clock::now();
|
||||
sessions.emplace(gpu_field, session);
|
||||
|
||||
// start session
|
||||
status = rocmtools_device_profiling_session_start(session.id);
|
||||
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::destroy_session(pair_gpu_field_t gpu_field) {
|
||||
if (sessions.empty()) {
|
||||
RDC_LOG(RDC_DEBUG, "Cannot destroy empty session...");
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
// no session with field
|
||||
if (sessions.count(gpu_field) == 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Cannot destroy session with field (" << gpu_field.second << ") on GPU ["
|
||||
<< gpu_field.first
|
||||
<< "] because it doesn't exist...");
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
const rocmtools_session_id_t session_id = sessions.at(gpu_field).id;
|
||||
const rocmtools_status_t status = rocmtools_device_profiling_session_destroy(session_id);
|
||||
if (status == ROCMTOOLS_STATUS_SUCCESS) {
|
||||
const auto num_of_destroyed_sessions = sessions.erase(gpu_field);
|
||||
RDC_LOG(RDC_DEBUG, "destroyed (" << num_of_destroyed_sessions << ") sessions");
|
||||
}
|
||||
return Rocp2RdcError(status);
|
||||
}
|
||||
|
||||
rdc_status_t RdcRocpBase::Rocp2RdcError(rocmtools_status_t rocm_status) {
|
||||
rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) {
|
||||
switch (rocm_status) {
|
||||
case ROCMTOOLS_STATUS_SUCCESS:
|
||||
case HSA_STATUS_SUCCESS:
|
||||
return RDC_ST_OK;
|
||||
case ROCMTOOLS_STATUS_ERROR_HAS_ACTIVE_SESSION:
|
||||
return RDC_ST_ALREADY_EXIST;
|
||||
case ROCMTOOLS_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH:
|
||||
case ROCMTOOLS_STATUS_ERROR_SESSION_MISSING_FILTER:
|
||||
case ROCMTOOLS_STATUS_ERROR_SESSION_NOT_FOUND:
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
default:
|
||||
return RDC_ST_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
@@ -36,6 +36,8 @@ THE SOFTWARE.
|
||||
|
||||
amd::rdc::RdcRocpBase rocp;
|
||||
|
||||
rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; }
|
||||
|
||||
// get supported field ids
|
||||
// TODO: Query fields with rocprofiler
|
||||
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) {
|
||||
@@ -54,7 +56,7 @@ rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint
|
||||
}
|
||||
|
||||
// Fetch
|
||||
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count,
|
||||
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint32_t fields_count,
|
||||
rdc_field_value_f callback, void* user_data) {
|
||||
//
|
||||
// Bulk fetch fields
|
||||
@@ -69,7 +71,8 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi
|
||||
rdc_gpu_field_value_t values[BULK_FIELDS_MAX];
|
||||
uint32_t bulk_count = 0;
|
||||
rdc_status_t status = RDC_ST_UNKNOWN_ERROR;
|
||||
double value = 0;
|
||||
double data;
|
||||
|
||||
for (uint32_t i = 0; i < fields_count; i++) {
|
||||
if (bulk_count >= BULK_FIELDS_MAX) {
|
||||
status = callback(values, bulk_count, user_data);
|
||||
@@ -80,14 +83,13 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi
|
||||
bulk_count = 0;
|
||||
}
|
||||
|
||||
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &value);
|
||||
|
||||
status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data);
|
||||
// get value
|
||||
values[bulk_count].gpu_index = fields[i].gpu_index;
|
||||
values[bulk_count].field_value.type = DOUBLE;
|
||||
values[bulk_count].field_value.status = status;
|
||||
values[bulk_count].field_value.ts = curTime;
|
||||
values[bulk_count].field_value.value.dbl = value;
|
||||
values[bulk_count].field_value.value.dbl = data;
|
||||
values[bulk_count].field_value.field_id = fields[i].field_id;
|
||||
bulk_count++;
|
||||
}
|
||||
@@ -106,8 +108,7 @@ rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields
|
||||
rdc_status_t status = RDC_ST_OK;
|
||||
for (uint32_t i = 0; i < fields_count; i++) {
|
||||
RDC_LOG(RDC_DEBUG, "WATCH: " << fields[i].field_id);
|
||||
const rdc_status_t temp_status =
|
||||
rocp.create_session(std::make_pair(fields[i].gpu_index, fields[i].field_id));
|
||||
const rdc_status_t temp_status = RDC_ST_OK;
|
||||
if (temp_status != RDC_ST_OK) {
|
||||
status = temp_status;
|
||||
}
|
||||
@@ -119,8 +120,7 @@ rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, uint32_t fiel
|
||||
rdc_status_t status = RDC_ST_OK;
|
||||
for (uint32_t i = 0; i < fields_count; i++) {
|
||||
RDC_LOG(RDC_DEBUG, "UNWATCH: " << fields[i].field_id);
|
||||
const rdc_status_t temp_status =
|
||||
rocp.destroy_session(std::make_pair(fields[i].gpu_index, fields[i].field_id));
|
||||
const rdc_status_t temp_status = RDC_ST_OK;
|
||||
// return last non-ok status
|
||||
if (temp_status != RDC_ST_OK) {
|
||||
status = temp_status;
|
||||
|
||||
@@ -29,7 +29,7 @@ set(RDC_ROCR_LIB_INC_LIST
|
||||
"${RDC_LIB_INC_DIR}/RdcDiagnosticLibInterface.h"
|
||||
"${RDC_LIB_INC_DIR}/rdc_common.h")
|
||||
|
||||
if(BUILD_ROCRTEST)
|
||||
if(BUILD_RUNTIME)
|
||||
message("Build librdc_rocr.so is enabled, make sure the Rocm run time is installed.")
|
||||
|
||||
message("RDC_ROCR_LIB_INC_LIST=${RDC_ROCR_LIB_INC_LIST}")
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user