diff --git a/CMakeLists.txt b/CMakeLists.txt index a6421400b3..16a901df1c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,13 +48,13 @@ option(BUILD_STANDALONE "Build targets for rdci and rdcd" ON) # which requires the ROCT-Thunk-Interface. option(BUILD_RASLIB "Build targets for raslib" OFF) -# When cmake -DBUILD_ROCRTEST=off, it will not build the librdc_rocr.so +# When cmake -DBUILD_RUNTIME=off, it will not build the librdc_rocr.so # which requires the Rocm run time. -option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON) +option(BUILD_RUNTIME "Build targets for librdc_rocr.so" ON) -# When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so +# When cmake -DBUILD_PROFILER=off, it will not build the librdc_rocp.so # which requires the Rocm profiler. -option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF) +option(BUILD_PROFILER "Build targets for librdc_rocp.so" OFF) # When cmake -DBUILD_RVS=off, it will not build the librdc_rvs.so # which requires the RocmValidationSuite diff --git a/README.md b/README.md index 1ae13c39a9..5e52d25b8f 100644 --- a/README.md +++ b/README.md @@ -83,9 +83,9 @@ If only the RDC libraries are needed (i.e. only "embedded mode" is required), th ## Building RDC library without ROCM Run time (optional) -The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_ROCRTEST=off should be passed on the the cmake command line: +The user can choose to not build RDC diagnostic ROCM Run time. This will eliminate the need for ROCM Run time. To build in this way, -DBUILD_RUNTIME=off should be passed on the the cmake command line: - cmake -B build -DBUILD_ROCRTEST=off + cmake -B build -DBUILD_RUNTIME=off ## Update System Library Path diff --git a/cmake_modules/Findrocmtools.cmake b/cmake_modules/Findrocprofiler.cmake similarity index 92% rename from cmake_modules/Findrocmtools.cmake rename to cmake_modules/Findrocprofiler.cmake index 0a1109f411..4b794b440a 100644 --- a/cmake_modules/Findrocmtools.cmake +++ b/cmake_modules/Findrocprofiler.cmake @@ -1,7 +1,7 @@ -# This module provides a rocmtools::rocmtools package +# This module provides a rocprofiler::rocprofiler package # You can specify the ROCM directory by setting ROCM_DIR -set(NAME rocmtools) +set(NAME rocprofiler) if(NOT DEFINED ROCM_DIR) set(ROCM_DIR "/opt/rocm") diff --git a/common/rdc_field.data b/common/rdc_field.data index 6d30de11d4..2e584b3fba 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -98,6 +98,13 @@ FLD_DESC_ENT(RDC_FI_XGMI_7_WRITE_KB, "XGMI7 accumulated data write size (KB) +// ROCProfiler fields +// This doesn't map to rocprofiler counters directly +// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h +// See metrics.xml in rocprofiler +FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL, "", "PROF_GPU_UTIL", false) +FLD_DESC_ENT(RDC_FI_PROF_TA_BUSY_AVR, "", "PROF_TA_BUSY_AVR", false) + // Events FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false) FLD_DESC_ENT(RDC_EVNT_XGMI_0_REQ_TX, "Outgoing requests to neighbor 0", "XGMI_REQ_0", false) diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index c2cb224b67..74a14009e3 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -247,6 +247,12 @@ typedef enum { RDC_FI_XGMI_6_WRITE_KB, //!< XGMI_6 accumulated data write size (KB) RDC_FI_XGMI_7_WRITE_KB, //!< XGMI_7 accumulated data write size (KB) + /** + * @brief ROC-profiler related fields + */ + RDC_FI_PROF_GPU_UTIL = 800, //!< + RDC_FI_PROF_TA_BUSY_AVR, //!< + /* * @brief Raw XGMI counter events */ diff --git a/include/rdc_lib/impl/RdcEmbeddedHandler.h b/include/rdc_lib/impl/RdcEmbeddedHandler.h index c7af1f73f6..5c68ec9654 100644 --- a/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -36,7 +36,7 @@ THE SOFTWARE. namespace amd { namespace rdc { -class RdcEmbeddedHandler : public RdcHandler { +class RdcEmbeddedHandler final : public RdcHandler { public: // Job API rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId, const char job_id[64], @@ -91,7 +91,7 @@ class RdcEmbeddedHandler : public RdcHandler { rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override; explicit RdcEmbeddedHandler(rdc_operation_mode_t op_mode); - ~RdcEmbeddedHandler(); + ~RdcEmbeddedHandler() final; private: rdc_status_t get_gpu_gauges(rdc_gpu_gauges_t* gpu_gauges); diff --git a/include/rdc_lib/impl/RdcMetricFetcherImpl.h b/include/rdc_lib/impl/RdcMetricFetcherImpl.h index 935ac788f7..ac8e6ff73e 100644 --- a/include/rdc_lib/impl/RdcMetricFetcherImpl.h +++ b/include/rdc_lib/impl/RdcMetricFetcherImpl.h @@ -67,7 +67,7 @@ struct MetricTask { std::function task; }; -class RdcMetricFetcherImpl : public RdcMetricFetcher { +class RdcMetricFetcherImpl final : public RdcMetricFetcher { public: rdc_status_t fetch_smi_field(uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value) override; @@ -75,7 +75,7 @@ class RdcMetricFetcherImpl : public RdcMetricFetcher { rdc_gpu_field_t* fields, uint32_t fields_count, std::vector& results) override; // NOLINT RdcMetricFetcherImpl(); - ~RdcMetricFetcherImpl(); + ~RdcMetricFetcherImpl() final; rdc_status_t acquire_smi_handle(RdcFieldKey fk) override; rdc_status_t delete_smi_handle(RdcFieldKey fk) override; diff --git a/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h b/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h index ffc54e9f35..421959f208 100644 --- a/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h +++ b/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h @@ -31,12 +31,13 @@ THE SOFTWARE. namespace amd { namespace rdc { -class RdcMetricsUpdaterImpl : public RdcMetricsUpdater { +class RdcMetricsUpdaterImpl final : public RdcMetricsUpdater { public: void start() override; void stop() override; explicit RdcMetricsUpdaterImpl(const RdcWatchTablePtr& watch_table, const uint32_t check_frequency); + ~RdcMetricsUpdaterImpl() = default; private: RdcWatchTablePtr watch_table_; diff --git a/include/rdc_lib/impl/RdcRocpLib.h b/include/rdc_lib/impl/RdcRocpLib.h index e82d20a35d..95897ce2cf 100644 --- a/include/rdc_lib/impl/RdcRocpLib.h +++ b/include/rdc_lib/impl/RdcRocpLib.h @@ -39,43 +39,32 @@ class RdcRocpLib : public RdcTelemetry { // get support field ids rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) override; - // Fetch rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count, rdc_field_value_f callback, void* user_data) override; - rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count) override; - rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, uint32_t fields_count) override; - RdcRocpLib(); - ~RdcRocpLib(); private: RdcLibraryLoader lib_loader_; - rdc_status_t (*telemetry_fields_query_)(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count); - rdc_status_t (*telemetry_fields_value_get_)(rdc_gpu_field_t* fields, uint32_t fields_count, rdc_field_value_f callback, void* user_data); - rdc_status_t (*telemetry_fields_watch_)(rdc_gpu_field_t* fields, uint32_t fields_count); - rdc_status_t (*telemetry_fields_unwatch_)(rdc_gpu_field_t* fields, uint32_t fields_count); - /** * @brief Extract current ROCM_PATH from library or the environment */ std::string get_rocm_path(); - /** * @brief Set ROCMTOOLS_METRICS_PATH environment variable needed by - * librocmtools + * librocprofiler */ - rdc_status_t set_rocmtools_path(); + rdc_status_t set_rocprofiler_path(); }; using RdcRocpLibPtr = std::shared_ptr; diff --git a/include/rdc_modules/rdc_rocp/RdcRocpBase.h b/include/rdc_modules/rdc_rocp/RdcRocpBase.h index 6b1f47ac0f..c6a14f39de 100644 --- a/include/rdc_modules/rdc_rocp/RdcRocpBase.h +++ b/include/rdc_modules/rdc_rocp/RdcRocpBase.h @@ -22,15 +22,17 @@ THE SOFTWARE. #ifndef RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_ #define RDC_MODULES_RDC_ROCP_RDCROCPBASE_H_ -#include +#include #include #include #include +#include #include #include #include #include +#include #include "rdc/rdc.h" @@ -43,33 +45,21 @@ namespace rdc { * See metrics.xml in rocmtools for more info. * RDC_CALC fields are calculated over time by RDC. */ -static const std::unordered_map counter_map_k = { - {RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"}, - {RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"}, - {RDC_FI_PROF_ACTIVE_CYCLES, "SQ_BUSY_CU_CYCLES"}, - {RDC_FI_PROF_CU_OCCUPANCY, "CU_OCCUPANCY"}, - {RDC_FI_PROF_CU_UTILIZATION, "CU_UTILIZATION"}, - {RDC_FI_PROF_FETCH_SIZE, "FETCH_SIZE"}, - {RDC_FI_PROF_WRITE_SIZE, "WRITE_SIZE"}, - {RDC_FI_PROF_FLOPS_16, "TOTAL_16_OPS"}, - {RDC_FI_PROF_FLOPS_32, "TOTAL_32_OPS"}, - {RDC_FI_PROF_FLOPS_64, "TOTAL_64_OPS"}, - // fields below require special handling - {RDC_FI_PROF_GFLOPS_16, "TOTAL_16_OPS"}, - {RDC_FI_PROF_GFLOPS_32, "TOTAL_32_OPS"}, - {RDC_FI_PROF_GFLOPS_64, "TOTAL_64_OPS"}, - {RDC_FI_PROF_MEMR_BW_KBPNS, "FETCH_SIZE"}, - {RDC_FI_PROF_MEMW_BW_KBPNS, "WRITE_SIZE"}, +static const std::map counter_map_k = { + {RDC_FI_PROF_GPU_UTIL, "GPU_UTIL"}, + {RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"}, }; +typedef struct { + hsa_agent_t* agents; + unsigned count; + unsigned capacity; +} hsa_agent_arr_t; + /// Common interface for RocP tests and samples class RdcRocpBase { + static const int dev_count = 1; typedef std::pair pair_gpu_field_t; - typedef struct session_info_t { - rocmtools_session_id_t id{}; - std::chrono::time_point start_time; - std::chrono::time_point stop_time; - } session_info_t; public: RdcRocpBase(); @@ -90,43 +80,21 @@ class RdcRocpBase { */ rdc_status_t rocp_lookup(pair_gpu_field_t gpu_field, double* value); - /** - * @brief Destroy ROCmTools session responsible for monitoring a given - * field - * - * @details While rocmtools supports multiple fields per ID - it has a - * limit to how many counters it can query internally. - * To avoid concerning ourselves with said limit, we limit each session to - * 1 field. - * In the future this can be optimized to allow for multiple fields per - * session. - * - * @param[in] field A field to start monitoring - * - * @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed - * successfully. - */ - rdc_status_t create_session(pair_gpu_field_t gpu_field); - - /** - * @brief Destroy ROCmTools session responsible for monitoring a given - * field - * - * @param[in] field A field to stop monitoring - * - * @retval ::ROCMTOOLS_STATUS_SUCCESS The function has been executed - * successfully. - */ - rdc_status_t destroy_session(pair_gpu_field_t gpu_field); - protected: private: - std::map sessions; + rocprofiler_t* contexts[dev_count] = {nullptr}; + static const int features_count = 1; + std::map metrics; + rocprofiler_feature_t features[dev_count][features_count]; + void read_features(rocprofiler_t* context, const unsigned feature_count); + int run_profiler(const char* feature_name, hsa_queue_t** queues); + hsa_queue_t* queues[dev_count] = {nullptr}; + hsa_agent_arr_t agent_arr; /** * @brief Convert from rocmtools status into RDC status */ - rdc_status_t Rocp2RdcError(rocmtools_status_t rocm_status); + rdc_status_t Rocp2RdcError(hsa_status_t rocm_status); }; } // namespace rdc diff --git a/rdc_libs/rdc/src/RdcModuleMgrImpl.cc b/rdc_libs/rdc/src/RdcModuleMgrImpl.cc index dac2048e16..6a1cf84c7b 100644 --- a/rdc_libs/rdc/src/RdcModuleMgrImpl.cc +++ b/rdc_libs/rdc/src/RdcModuleMgrImpl.cc @@ -31,6 +31,7 @@ THE SOFTWARE. #include "rdc_lib/impl/RdcDiagnosticModule.h" #include "rdc_lib/impl/RdcRVSLib.h" #include "rdc_lib/impl/RdcRasLib.h" +#include "rdc_lib/impl/RdcRocpLib.h" #include "rdc_lib/impl/RdcRocrLib.h" #include "rdc_lib/impl/RdcSmiLib.h" #include "rdc_lib/impl/RdcTelemetryModule.h" @@ -41,7 +42,7 @@ namespace rdc { // pass shared_ptr instead of creating it template rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr ptr) { - static_assert(std::is_base_of_v || std::is_base_of_v); + static_assert(std::is_base_of_v || std::is_base_of_v); RDC_LOG(RDC_DEBUG, "Inserting module: " << typeid(T).name()); // same module can service multiple subsystems // e.g. Diagnostics and Telemetry @@ -57,7 +58,7 @@ rdc_status_t RdcModuleMgrImpl::insert_modules(std::shared_ptr ptr) { // base case template rdc_status_t RdcModuleMgrImpl::insert_modules() { - static_assert(std::is_base_of_v || std::is_base_of_v); + static_assert(std::is_base_of_v || std::is_base_of_v); try { auto ptr = std::make_shared(); return insert_modules(ptr); @@ -88,7 +89,7 @@ RdcModuleMgrImpl::RdcModuleMgrImpl(const RdcMetricFetcherPtr& fetcher) : fetcher } // all other modules get initialized by insert_modules - insert_modules(); + insert_modules(); } RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() { diff --git a/rdc_libs/rdc/src/RdcRocpLib.cc b/rdc_libs/rdc/src/RdcRocpLib.cc index 4d48dbd6ce..347e587101 100644 --- a/rdc_libs/rdc/src/RdcRocpLib.cc +++ b/rdc_libs/rdc/src/RdcRocpLib.cc @@ -38,16 +38,16 @@ RdcRocpLib::RdcRocpLib() telemetry_fields_value_get_(nullptr), telemetry_fields_watch_(nullptr), telemetry_fields_unwatch_(nullptr) { - rdc_status_t status = lib_loader_.load("librdc_rocp.so"); + rdc_status_t status = set_rocprofiler_path(); if (status != RDC_ST_OK) { RDC_LOG(RDC_ERROR, "Rocp related function will not work."); + throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocprofiler path could not be set"); return; } - status = set_rocmtools_path(); + status = lib_loader_.load("librdc_rocp.so"); if (status != RDC_ST_OK) { RDC_LOG(RDC_ERROR, "Rocp related function will not work."); - throw RdcException(RDC_ST_FAIL_LOAD_MODULE, "rocmtools path could not be set"); return; } @@ -142,7 +142,7 @@ std::string RdcRocpLib::get_rocm_path() { std::string line; while (getline(file, line)) { - size_t index_end = line.find("librocmtools.so"); + size_t index_end = line.find("librocprofiler64.so"); size_t index_start = index_end; if (index_end == std::string::npos) { // no library on this line @@ -162,32 +162,38 @@ std::string RdcRocpLib::get_rocm_path() { return rocm_path; } -rdc_status_t RdcRocpLib::set_rocmtools_path() { - // librocmtools requires ROCMTOOLS_METRICS_PATH to be set - std::string rocmtools_metrics_path = - get_rocm_path() + "/libexec/rocmtools/counters/derived_counters.xml"; +rdc_status_t RdcRocpLib::set_rocprofiler_path() { + // librocprofiler64 requires ROCPROFILER_METRICS_PATH to be set + std::string rocprofiler_metrics_path = + get_rocm_path() + "/libexec/rocprofiler/counters/derived_counters.xml"; // set rocm prefix - int result = setenv("ROCMTOOLS_METRICS_PATH", rocmtools_metrics_path.c_str(), 0); + int result = setenv("ROCPROFILER_METRICS_PATH", rocprofiler_metrics_path.c_str(), 0); if (result != 0) { - RDC_LOG(RDC_ERROR, "setenv ROCMTOOLS_METRICS_PATH failed! " << result); + RDC_LOG(RDC_ERROR, "setenv ROCPROFILER_METRICS_PATH failed! " << result); return RDC_ST_PERM_ERROR; } // check that env exists - const char* rocmtools_metrics_env = getenv("ROCMTOOLS_METRICS_PATH"); - if (rocmtools_metrics_env == nullptr) { - RDC_LOG(RDC_ERROR, "ROCMTOOLS_METRICS_PATH is not set!"); + const char* rocprofiler_metrics_env = getenv("ROCPROFILER_METRICS_PATH"); + if (rocprofiler_metrics_env == nullptr) { + RDC_LOG(RDC_ERROR, "ROCPROFILER_METRICS_PATH is not set!"); return RDC_ST_NO_DATA; } // check that file can be accessed - std::ifstream test_file(rocmtools_metrics_env); + std::ifstream test_file(rocprofiler_metrics_env); if (!test_file.good()) { - RDC_LOG(RDC_ERROR, "failed to open ROCMTOOLS_METRICS_PATH: " << rocmtools_metrics_env); + RDC_LOG(RDC_ERROR, "failed to open ROCPROFILER_METRICS_PATH: " << rocprofiler_metrics_env); return RDC_ST_FILE_ERROR; } + result = setenv("ROCP_METRICS", rocprofiler_metrics_path.c_str(), 0); + if (result != 0) { + RDC_LOG(RDC_ERROR, "setenv ROCP_METRICS failed! " << result); + return RDC_ST_PERM_ERROR; + } + return RDC_ST_OK; } diff --git a/rdc_libs/rdc_modules/rdc_rocp/CMakeLists.txt b/rdc_libs/rdc_modules/rdc_rocp/CMakeLists.txt index d2d03debd1..53366c95fd 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/CMakeLists.txt +++ b/rdc_libs/rdc_modules/rdc_rocp/CMakeLists.txt @@ -17,19 +17,22 @@ set(RDC_ROCP_LIB_INC_LIST "${RDC_LIB_INC_DIR}/RdcLogger.h" "${INC_DIR}/RdcRocpBase.h") -if(BUILD_ROCPTEST) +if(BUILD_PROFILER) message("Build librdc_rocp.so is enabled, make sure ROCmTools is installed.") message("RDC_ROCP_LIB_INC_LIST=${RDC_ROCP_LIB_INC_LIST}") - set(ROCMTOOLS_LIB rocmtools::rocmtools) - # below provides rocmtools::rocmtools package - include(Findrocmtools) + set(ROCPROFILER_LIB rocprofiler::rocprofiler) + # below provides rocprofiler::rocprofiler package + include(Findrocprofiler) - set(HSA_LIB "hsa-runtime64") + find_package(hsa-runtime64 + NAMES hsa-runtime64 + HINTS ${ROCM_DIR}/lib/cmake + CONFIGURE REQUIRED) set(RDC_LIB_MODULES ${RDC_LIB_MODULES} ${RDC_ROCP_LIB} PARENT_SCOPE) add_library(${RDC_ROCP_LIB} SHARED ${RDC_ROCP_LIB_SRC_LIST} ${RDC_ROCP_LIB_INC_LIST}) - target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} ${HSA_LIB} ${ROCMTOOLS_LIB} pthread dl) + target_link_libraries(${RDC_ROCP_LIB} ${RDC_LIB} ${BOOTSTRAP_LIB} hsa-runtime64::hsa-runtime64 ${ROCPROFILER_LIB} pthread dl) target_include_directories(${RDC_ROCP_LIB} PRIVATE "${PROJECT_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/include" diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc index 4cdee36d81..1efb594416 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc @@ -22,14 +22,13 @@ THE SOFTWARE. #include "rdc_modules/rdc_rocp/RdcRocpBase.h" -#include +#include +#include #include -#include #include -#include -#include "hsa.h" +// #include "hsa.h" #include "rdc/rdc.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/rdc_common.h" @@ -37,7 +36,162 @@ THE SOFTWARE. namespace amd { namespace rdc { +static hsa_status_t get_agent_handle_cb(hsa_agent_t agent, void* agent_arr) { + hsa_device_type_t type; + hsa_agent_arr_t* agent_arr_ = (hsa_agent_arr_t*)agent_arr; + + hsa_status_t hsa_errno = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + if (hsa_errno != HSA_STATUS_SUCCESS) { + return hsa_errno; + } + + if (type == HSA_DEVICE_TYPE_GPU) { + if (agent_arr_->count >= agent_arr_->capacity) { + agent_arr_->capacity *= 2; + agent_arr_->agents = + (hsa_agent_t*)realloc(agent_arr_->agents, agent_arr_->capacity * sizeof(hsa_agent_t)); + // realloc might set agents to nullptr upon failure + assert(agent_arr_->agents != nullptr); + } + agent_arr_->agents[agent_arr_->count] = agent; + ++agent_arr_->count; + } + + return HSA_STATUS_SUCCESS; +} + +void RdcRocpBase::read_features(rocprofiler_t* context, const unsigned feature_count) { + hsa_status_t hsa_errno = rocprofiler_read(context, 0); + assert(hsa_errno == HSA_STATUS_SUCCESS); + hsa_errno = rocprofiler_get_data(context, 0); + assert(hsa_errno == HSA_STATUS_SUCCESS); + hsa_errno = rocprofiler_get_metrics(context); + assert(hsa_errno == HSA_STATUS_SUCCESS); + for (auto i = 0; i < feature_count; i++) { + switch (features[0][i].data.kind) { + case ROCPROFILER_DATA_KIND_DOUBLE: + metrics[features[0][i].name] = features[0][i].data.result_double; + break; + default: + RDC_LOG(RDC_ERROR, "ERROR: Unexpected feature kind: " << features[0][i].data.kind); + } + } +} + +static int get_agents(hsa_agent_arr_t* agent_arr) { + int errcode = 0; + hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; + + agent_arr->capacity = 1; + agent_arr->count = 0; + agent_arr->agents = (hsa_agent_t*)calloc(agent_arr->capacity, sizeof(hsa_agent_t)); + assert(agent_arr->agents); + + hsa_errno = hsa_iterate_agents(get_agent_handle_cb, agent_arr); + if (hsa_errno != HSA_STATUS_SUCCESS) { + errcode = -1; + + agent_arr->capacity = 0; + agent_arr->count = 0; + free(agent_arr->agents); + } + + return errcode; +} + +bool createHsaQueue(hsa_queue_t** queue, hsa_agent_t gpu_agent) { + // create a single-producer queue + // TODO: check if API args are correct, especially UINT32_MAX + hsa_status_t status; + status = hsa_queue_create(gpu_agent, 64, HSA_QUEUE_TYPE_SINGLE, NULL, NULL, UINT32_MAX, + UINT32_MAX, queue); + if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "Queue creation failed"); + + // TODO: warning: is it really required!! ?? + status = hsa_amd_queue_set_priority(*queue, HSA_AMD_QUEUE_PRIORITY_HIGH); + if (status != HSA_STATUS_SUCCESS) fprintf(stdout, "HSA Queue Priority Set Failed"); + + return (status == HSA_STATUS_SUCCESS); +} + +int RdcRocpBase::run_profiler(const char* feature_name, hsa_queue_t** queues) { + const char* events[features_count] = {feature_name}; + + // initialize hsa. hsa_init() will also load the profiler libs under the hood + hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; + + for (int i = 0; i < dev_count; ++i) { + for (int j = 0; j < features_count; ++j) { + features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; + features[i][j].name = events[j]; + } + } + + rocprofiler_t* contexts[dev_count] = {0}; + for (int i = 0; i < dev_count; ++i) { + rocprofiler_properties_t properties = { + queues[i], + 64, + NULL, + NULL, + }; + int mode = (ROCPROFILER_MODE_STANDALONE | ROCPROFILER_MODE_SINGLEGROUP); + hsa_errno = rocprofiler_open(agent_arr.agents[i], features[i], features_count, &contexts[i], + mode, &properties); + const char* error_string; + rocprofiler_error_string(&error_string); + if (error_string != NULL) { + fprintf(stdout, "%s", error_string); + fflush(stdout); + } + assert(hsa_errno == HSA_STATUS_SUCCESS); + } + + for (int i = 0; i < dev_count; ++i) { + hsa_errno = rocprofiler_start(contexts[i], 0); + assert(hsa_errno == HSA_STATUS_SUCCESS); + } + + // this is the duration for which the counter increments from zero. + usleep(10000); + + for (int i = 0; i < dev_count; ++i) { + hsa_errno = rocprofiler_stop(contexts[i], 0); + assert(hsa_errno == HSA_STATUS_SUCCESS); + } + + for (int i = 0; i < dev_count; ++i) { + // printf("Iteration %d\n", loopcount++); + // fprintf(stdout, "------ Collecting Device[%d] -------\n", i); + read_features(contexts[i], features_count); + } + + usleep(100); + + for (int i = 0; i < dev_count; ++i) { + hsa_errno = rocprofiler_close(contexts[i]); + assert(hsa_errno == HSA_STATUS_SUCCESS); + } + + return 0; +} + RdcRocpBase::RdcRocpBase() { + // populate monitored fields + const std::map counter_map_k = { + {RDC_FI_PROF_TA_BUSY_AVR, "TA_BUSY_avr"}, + }; + std::cout << "Size of counter_map_k: " << counter_map_k.size() << "\n"; + for (auto& [k, v] : counter_map_k) { + metrics[v] = 0.0; + } + assert(metrics.size() == counter_map_k.size()); + + printf("Metric size %d\n", (int)metrics.size()); + for (auto& metric : metrics) { + printf("Metric: %s\n", metric.first); + } + hsa_status_t err = hsa_init(); if (err != HSA_STATUS_SUCCESS) { const char* errstr = nullptr; @@ -45,124 +199,72 @@ RdcRocpBase::RdcRocpBase() { throw std::runtime_error("hsa error code: " + std::to_string(err) + " " + errstr); } - auto status = rocmtools_initialize(); - RDC_LOG(RDC_INFO, "rocmtools_initialize status: " << status); + // populate list of agents + int errcode = get_agents(&agent_arr); + if (errcode != 0) { + return; + } + printf("number of devices: %u\n", agent_arr.count); + printf("devices being profiled: %u\n", dev_count); + + for (int i = 0; i < dev_count; ++i) { + int j = 0; + for (auto& metric : metrics) { + features[i][j].kind = (rocprofiler_feature_kind_t)ROCPROFILER_FEATURE_KIND_METRIC; + features[i][j].name = metric.first; + printf("Metric[%d]: %s\n", j, features[i][j].name); + j++; + } + } + + for (int i = 0; i < dev_count; ++i) { + if (!createHsaQueue(&queues[i], agent_arr.agents[i])) { + fprintf(stdout, "can't create queues[%d]\n", i); + } + } } RdcRocpBase::~RdcRocpBase() { - for (auto& session : sessions) { - const rdc_status_t status = destroy_session(session.first); - assert(status == RDC_ST_OK); + hsa_status_t hsa_errno = HSA_STATUS_SUCCESS; + for (int i = 0; i < dev_count; ++i) { + hsa_errno = rocprofiler_stop(contexts[i], 0); + assert(hsa_errno == HSA_STATUS_SUCCESS); } - sessions.clear(); - auto status = rocmtools_finalize(); - RDC_LOG(RDC_INFO, "rocmtools_finalize status: " << status); - hsa_status_t err = hsa_shut_down(); - if (err != HSA_STATUS_SUCCESS) { - const char* errstr = nullptr; - hsa_status_string(err, &errstr); - // cannot throw an error here. print instead - RDC_LOG(RDC_ERROR, "hsa error code: " + std::to_string(err) + " " + errstr); + for (int i = 0; i < dev_count; ++i) { + hsa_errno = rocprofiler_close(contexts[i]); + assert(hsa_errno == HSA_STATUS_SUCCESS); } + + hsa_errno = hsa_shut_down(); + assert(hsa_errno == HSA_STATUS_SUCCESS); + hsa_errno = hsa_shut_down(); + assert(hsa_errno == HSA_STATUS_ERROR_NOT_INITIALIZED); } rdc_status_t RdcRocpBase::rocp_lookup(pair_gpu_field_t gpu_field, double* value) { - if (sessions.empty()) { - return RDC_ST_NOT_FOUND; - } - if (value == nullptr) { return RDC_ST_BAD_PARAMETER; } - rocmtools_device_profile_metric_t counter; - session_info_t session = sessions.at(gpu_field); - const rocmtools_status_t status = rocmtools_device_profiling_session_poll(session.id, &counter); - session.stop_time = std::chrono::high_resolution_clock::now(); - if (status != ROCMTOOLS_STATUS_SUCCESS) { + hsa_status_t status = HSA_STATUS_SUCCESS; + if (status != HSA_STATUS_SUCCESS) { return Rocp2RdcError(status); } - const auto elapsed = - std::chrono::duration_cast(session.stop_time - session.start_time) - .count(); - // some metrics are derived from others and depend on time passed switch (gpu_field.second) { - case RDC_FI_PROF_GFLOPS_16: - case RDC_FI_PROF_GFLOPS_32: - case RDC_FI_PROF_GFLOPS_64: - case RDC_FI_PROF_MEMR_BW_KBPNS: - case RDC_FI_PROF_MEMW_BW_KBPNS: - *value = counter.value.value / elapsed; - break; default: - *value = counter.value.value; + run_profiler("TA_BUSY_avr", queues); + // read_features(contexts[gpu_field.first], features_count); + *value = metrics[counter_map_k.at(gpu_field.second)]; break; } return Rocp2RdcError(status); } -rdc_status_t RdcRocpBase::create_session(pair_gpu_field_t gpu_field) { - if (sessions.count(gpu_field) != 0) { - RDC_LOG(RDC_DEBUG, "Session for field (" << gpu_field.second << ") on GPU [" << gpu_field.first - << "] already exists!"); - return RDC_ST_ALREADY_EXIST; - } - - session_info_t session = {}; - - std::vector rocmtools_fields = {counter_map_k.at(gpu_field.second)}; - // create session - rocmtools_status_t status = rocmtools_device_profiling_session_create( - rocmtools_fields.data(), rocmtools_fields.size(), &session.id, 0, gpu_field.first); - - if (status != ROCMTOOLS_STATUS_SUCCESS) { - return Rocp2RdcError(status); - } - - // add start time - session.start_time = std::chrono::high_resolution_clock::now(); - sessions.emplace(gpu_field, session); - - // start session - status = rocmtools_device_profiling_session_start(session.id); - - return Rocp2RdcError(status); -} - -rdc_status_t RdcRocpBase::destroy_session(pair_gpu_field_t gpu_field) { - if (sessions.empty()) { - RDC_LOG(RDC_DEBUG, "Cannot destroy empty session..."); - return RDC_ST_OK; - } - - // no session with field - if (sessions.count(gpu_field) == 0) { - RDC_LOG(RDC_DEBUG, "Cannot destroy session with field (" << gpu_field.second << ") on GPU [" - << gpu_field.first - << "] because it doesn't exist..."); - return RDC_ST_OK; - } - - const rocmtools_session_id_t session_id = sessions.at(gpu_field).id; - const rocmtools_status_t status = rocmtools_device_profiling_session_destroy(session_id); - if (status == ROCMTOOLS_STATUS_SUCCESS) { - const auto num_of_destroyed_sessions = sessions.erase(gpu_field); - RDC_LOG(RDC_DEBUG, "destroyed (" << num_of_destroyed_sessions << ") sessions"); - } - return Rocp2RdcError(status); -} - -rdc_status_t RdcRocpBase::Rocp2RdcError(rocmtools_status_t rocm_status) { +rdc_status_t RdcRocpBase::Rocp2RdcError(hsa_status_t rocm_status) { switch (rocm_status) { - case ROCMTOOLS_STATUS_SUCCESS: + case HSA_STATUS_SUCCESS: return RDC_ST_OK; - case ROCMTOOLS_STATUS_ERROR_HAS_ACTIVE_SESSION: - return RDC_ST_ALREADY_EXIST; - case ROCMTOOLS_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH: - case ROCMTOOLS_STATUS_ERROR_SESSION_MISSING_FILTER: - case ROCMTOOLS_STATUS_ERROR_SESSION_NOT_FOUND: - return RDC_ST_BAD_PARAMETER; default: return RDC_ST_UNKNOWN_ERROR; } diff --git a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc index c8441ba7c5..b175a39634 100644 --- a/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc +++ b/rdc_libs/rdc_modules/rdc_rocp/RdcTelemetryLib.cc @@ -36,6 +36,8 @@ THE SOFTWARE. amd::rdc::RdcRocpBase rocp; +rdc_status_t rdc_module_init(uint64_t flags) { return RDC_ST_OK; } + // get supported field ids // TODO: Query fields with rocprofiler rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint32_t* field_count) { @@ -54,7 +56,7 @@ rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], uint } // Fetch -rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count, +rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, const uint32_t fields_count, rdc_field_value_f callback, void* user_data) { // // Bulk fetch fields @@ -69,7 +71,8 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi rdc_gpu_field_value_t values[BULK_FIELDS_MAX]; uint32_t bulk_count = 0; rdc_status_t status = RDC_ST_UNKNOWN_ERROR; - double value = 0; + double data; + for (uint32_t i = 0; i < fields_count; i++) { if (bulk_count >= BULK_FIELDS_MAX) { status = callback(values, bulk_count, user_data); @@ -80,14 +83,13 @@ rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fi bulk_count = 0; } - status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &value); - + status = rocp.rocp_lookup(std::make_pair(fields[i].gpu_index, fields[i].field_id), &data); // get value values[bulk_count].gpu_index = fields[i].gpu_index; values[bulk_count].field_value.type = DOUBLE; values[bulk_count].field_value.status = status; values[bulk_count].field_value.ts = curTime; - values[bulk_count].field_value.value.dbl = value; + values[bulk_count].field_value.value.dbl = data; values[bulk_count].field_value.field_id = fields[i].field_id; bulk_count++; } @@ -106,8 +108,7 @@ rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields rdc_status_t status = RDC_ST_OK; for (uint32_t i = 0; i < fields_count; i++) { RDC_LOG(RDC_DEBUG, "WATCH: " << fields[i].field_id); - const rdc_status_t temp_status = - rocp.create_session(std::make_pair(fields[i].gpu_index, fields[i].field_id)); + const rdc_status_t temp_status = RDC_ST_OK; if (temp_status != RDC_ST_OK) { status = temp_status; } @@ -119,8 +120,7 @@ rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, uint32_t fiel rdc_status_t status = RDC_ST_OK; for (uint32_t i = 0; i < fields_count; i++) { RDC_LOG(RDC_DEBUG, "UNWATCH: " << fields[i].field_id); - const rdc_status_t temp_status = - rocp.destroy_session(std::make_pair(fields[i].gpu_index, fields[i].field_id)); + const rdc_status_t temp_status = RDC_ST_OK; // return last non-ok status if (temp_status != RDC_ST_OK) { status = temp_status; diff --git a/rdc_libs/rdc_modules/rdc_rocr/CMakeLists.txt b/rdc_libs/rdc_modules/rdc_rocr/CMakeLists.txt index d61c87133f..85326c4bfa 100644 --- a/rdc_libs/rdc_modules/rdc_rocr/CMakeLists.txt +++ b/rdc_libs/rdc_modules/rdc_rocr/CMakeLists.txt @@ -29,7 +29,7 @@ set(RDC_ROCR_LIB_INC_LIST "${RDC_LIB_INC_DIR}/RdcDiagnosticLibInterface.h" "${RDC_LIB_INC_DIR}/rdc_common.h") -if(BUILD_ROCRTEST) +if(BUILD_RUNTIME) message("Build librdc_rocr.so is enabled, make sure the Rocm run time is installed.") message("RDC_ROCR_LIB_INC_LIST=${RDC_ROCR_LIB_INC_LIST}")