diff --git a/projects/rdc/include/rdc_modules/rdc_rocp/RdcRocpBase.h b/projects/rdc/include/rdc_modules/rdc_rocp/RdcRocpBase.h index 55b63e0e5a..cb36fc71a0 100644 --- a/projects/rdc/include/rdc_modules/rdc_rocp/RdcRocpBase.h +++ b/projects/rdc/include/rdc_modules/rdc_rocp/RdcRocpBase.h @@ -81,11 +81,15 @@ class RdcRocpBase { */ rdc_status_t map_entity_to_profiler(); + void init_rocp_if_not(); + std::vector agents = {}; std::vector> samplers = {}; std::map field_to_metric = {}; std::map entity_to_prof_map = {}; + bool m_is_initialized = false; + // these fields must be divided by time passed std::unordered_set eval_fields = { RDC_FI_PROF_EVAL_MEM_R_BW, RDC_FI_PROF_EVAL_MEM_W_BW, diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc index 4b18147987..658b26b4d4 100644 --- a/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc @@ -51,6 +51,69 @@ THE SOFTWARE. namespace amd { namespace rdc { +static const std::map temp_field_map_k = { + {RDC_FI_PROF_OCCUPANCY_PERCENT, "OccupancyPercent"}, + {RDC_FI_PROF_ACTIVE_CYCLES, "GRBM_GUI_ACTIVE"}, + {RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"}, + {RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"}, + {RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, + "MfmaUtil"}, // same as TENSOR_ACTIVE but available for more GPUs + {RDC_FI_PROF_GPU_UTIL_PERCENT, "GPU_UTIL"}, // metric is divided by 100 to get percent + // metrics below are divided by time passed + {RDC_FI_PROF_EVAL_MEM_R_BW, "FETCH_SIZE"}, + {RDC_FI_PROF_EVAL_MEM_W_BW, "WRITE_SIZE"}, + {RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"}, + {RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"}, + {RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"}, + {RDC_FI_PROF_EVAL_FLOPS_16_PERCENT, "RDC_OPS_16_PER_SIMDCYCLE"}, + {RDC_FI_PROF_EVAL_FLOPS_32_PERCENT, "RDC_OPS_32_PER_SIMDCYCLE"}, + {RDC_FI_PROF_EVAL_FLOPS_64_PERCENT, "RDC_OPS_64_PER_SIMDCYCLE"}, + // metrics below are not divided by time passed + {RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "ValuPipeIssueUtil"}, + {RDC_FI_PROF_SM_ACTIVE, "VALUBusy"}, + {RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"}, + {RDC_FI_PROF_OCC_ELAPSED, + "GRBM_GUI_ACTIVE"}, // this metric is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES + {RDC_FI_PROF_CPC_CPC_STAT_BUSY, "CPC_CPC_STAT_BUSY"}, + {RDC_FI_PROF_CPC_CPC_STAT_IDLE, "CPC_CPC_STAT_IDLE"}, + {RDC_FI_PROF_CPC_CPC_STAT_STALL, "CPC_CPC_STAT_STALL"}, + {RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "CPC_CPC_TCIU_BUSY"}, + {RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "CPC_CPC_TCIU_IDLE"}, + {RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "CPC_CPC_UTCL2IU_BUSY"}, + {RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "CPC_CPC_UTCL2IU_IDLE"}, + {RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "CPC_CPC_UTCL2IU_STALL"}, + {RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "CPC_ME1_BUSY_FOR_PACKET_DECODE"}, + {RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "CPC_ME1_DC0_SPI_BUSY"}, + {RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "CPC_UTCL1_STALL_ON_TRANSLATION"}, + {RDC_FI_PROF_CPC_ALWAYS_COUNT, "CPC_ALWAYS_COUNT"}, + {RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "CPC_ADC_VALID_CHUNK_NOT_AVAIL"}, + {RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "CPC_ADC_DISPATCH_ALLOC_DONE"}, + {RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "CPC_ADC_VALID_CHUNK_END"}, + {RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "CPC_SYNC_FIFO_FULL_LEVEL"}, + {RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "CPC_SYNC_FIFO_FULL"}, + {RDC_FI_PROF_CPC_GD_BUSY, "CPC_GD_BUSY"}, + {RDC_FI_PROF_CPC_TG_SEND, "CPC_TG_SEND"}, + {RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "CPC_WALK_NEXT_CHUNK"}, + {RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "CPC_STALLED_BY_SE0_SPI"}, + {RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "CPC_STALLED_BY_SE1_SPI"}, + {RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "CPC_STALLED_BY_SE2_SPI"}, + {RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "CPC_STALLED_BY_SE3_SPI"}, + {RDC_FI_PROF_CPC_LTE_ALL, "CPC_LTE_ALL"}, + {RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "CPC_SYNC_WRREQ_FIFO_BUSY"}, + {RDC_FI_PROF_CPC_CANE_BUSY, "CPC_CANE_BUSY"}, + {RDC_FI_PROF_CPC_CANE_STALL, "CPC_CANE_STALL"}, + {RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "CPF_CMP_UTCL1_STALL_ON_TRANSLATION"}, + {RDC_FI_PROF_CPF_CPF_STAT_BUSY, "CPF_CPF_STAT_BUSY"}, + {RDC_FI_PROF_CPF_CPF_STAT_IDLE, "CPF_CPF_STAT_IDLE"}, + {RDC_FI_PROF_CPF_CPF_STAT_STALL, "CPF_CPF_STAT_STALL"}, + {RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "CPF_CPF_TCIU_BUSY"}, + {RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"}, + {RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"}, + {RDC_FI_PROF_SIMD_UTILIZATION, "SIMD_UTILIZATION"}, + {RDC_FI_PROF_UUID, "SQ_WAVES"}, // dummy value, + {RDC_FI_PROF_KFD_ID, "SQ_WAVES"}, // dummy value, +}; + double RdcRocpBase::run_profiler(uint32_t agent_index, rdc_field_t field) { thread_local std::vector records; @@ -200,70 +263,13 @@ rdc_status_t RdcRocpBase::map_entity_to_profiler() { return RDC_ST_OK; } -RdcRocpBase::RdcRocpBase() { - // all fields - static const std::map temp_field_map_k = { - {RDC_FI_PROF_OCCUPANCY_PERCENT, "OccupancyPercent"}, - {RDC_FI_PROF_ACTIVE_CYCLES, "GRBM_GUI_ACTIVE"}, - {RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"}, - {RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"}, - {RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, - "MfmaUtil"}, // same as TENSOR_ACTIVE but available for more GPUs - {RDC_FI_PROF_GPU_UTIL_PERCENT, "GPU_UTIL"}, // metric is divided by 100 to get percent - // metrics below are divided by time passed - {RDC_FI_PROF_EVAL_MEM_R_BW, "FETCH_SIZE"}, - {RDC_FI_PROF_EVAL_MEM_W_BW, "WRITE_SIZE"}, - {RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"}, - {RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"}, - {RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"}, - {RDC_FI_PROF_EVAL_FLOPS_16_PERCENT, "RDC_OPS_16_PER_SIMDCYCLE"}, - {RDC_FI_PROF_EVAL_FLOPS_32_PERCENT, "RDC_OPS_32_PER_SIMDCYCLE"}, - {RDC_FI_PROF_EVAL_FLOPS_64_PERCENT, "RDC_OPS_64_PER_SIMDCYCLE"}, - // metrics below are not divided by time passed - {RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "ValuPipeIssueUtil"}, - {RDC_FI_PROF_SM_ACTIVE, "VALUBusy"}, - {RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"}, - {RDC_FI_PROF_OCC_ELAPSED, "GRBM_GUI_ACTIVE"}, // this metric is derived from - // OCC_PER_ACTIVE_CU and ACTIVE_CYCLES - {RDC_FI_PROF_CPC_CPC_STAT_BUSY, "CPC_CPC_STAT_BUSY"}, - {RDC_FI_PROF_CPC_CPC_STAT_IDLE, "CPC_CPC_STAT_IDLE"}, - {RDC_FI_PROF_CPC_CPC_STAT_STALL, "CPC_CPC_STAT_STALL"}, - {RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "CPC_CPC_TCIU_BUSY"}, - {RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "CPC_CPC_TCIU_IDLE"}, - {RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "CPC_CPC_UTCL2IU_BUSY"}, - {RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "CPC_CPC_UTCL2IU_IDLE"}, - {RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "CPC_CPC_UTCL2IU_STALL"}, - {RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "CPC_ME1_BUSY_FOR_PACKET_DECODE"}, - {RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "CPC_ME1_DC0_SPI_BUSY"}, - {RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "CPC_UTCL1_STALL_ON_TRANSLATION"}, - {RDC_FI_PROF_CPC_ALWAYS_COUNT, "CPC_ALWAYS_COUNT"}, - {RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "CPC_ADC_VALID_CHUNK_NOT_AVAIL"}, - {RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "CPC_ADC_DISPATCH_ALLOC_DONE"}, - {RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "CPC_ADC_VALID_CHUNK_END"}, - {RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "CPC_SYNC_FIFO_FULL_LEVEL"}, - {RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "CPC_SYNC_FIFO_FULL"}, - {RDC_FI_PROF_CPC_GD_BUSY, "CPC_GD_BUSY"}, - {RDC_FI_PROF_CPC_TG_SEND, "CPC_TG_SEND"}, - {RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "CPC_WALK_NEXT_CHUNK"}, - {RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "CPC_STALLED_BY_SE0_SPI"}, - {RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "CPC_STALLED_BY_SE1_SPI"}, - {RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "CPC_STALLED_BY_SE2_SPI"}, - {RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "CPC_STALLED_BY_SE3_SPI"}, - {RDC_FI_PROF_CPC_LTE_ALL, "CPC_LTE_ALL"}, - {RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "CPC_SYNC_WRREQ_FIFO_BUSY"}, - {RDC_FI_PROF_CPC_CANE_BUSY, "CPC_CANE_BUSY"}, - {RDC_FI_PROF_CPC_CANE_STALL, "CPC_CANE_STALL"}, - {RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "CPF_CMP_UTCL1_STALL_ON_TRANSLATION"}, - {RDC_FI_PROF_CPF_CPF_STAT_BUSY, "CPF_CPF_STAT_BUSY"}, - {RDC_FI_PROF_CPF_CPF_STAT_IDLE, "CPF_CPF_STAT_IDLE"}, - {RDC_FI_PROF_CPF_CPF_STAT_STALL, "CPF_CPF_STAT_STALL"}, - {RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "CPF_CPF_TCIU_BUSY"}, - {RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"}, - {RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"}, - {RDC_FI_PROF_SIMD_UTILIZATION, "SIMD_UTILIZATION"}, - {RDC_FI_PROF_UUID, "SQ_WAVES"}, // dummy value, - {RDC_FI_PROF_KFD_ID, "SQ_WAVES"}, // dummy value, - }; +void RdcRocpBase::init_rocp_if_not() { + if (m_is_initialized) { + return; + } + + // ensure initialization is attempted only once, even if it fails + m_is_initialized = true; hsa_status_t status = hsa_init(); if (status != HSA_STATUS_SUCCESS) { @@ -314,6 +320,19 @@ RdcRocpBase::RdcRocpBase() { RDC_LOG(RDC_DEBUG, "Profiler supports " << field_to_metric.size() << " fields"); } +RdcRocpBase::RdcRocpBase() { + // To verify if a field is actually supported by rocprofiler, + // initialization and agent querying are required. + // This initialization is deferred until the first call to rocp_lookup. + // Here, we define the potential fields that rocprofiler may support, + // allowing get_field_ids() to return them. + for (const auto& [k, v] : temp_field_map_k) { + field_to_metric.insert({k, v}); + } + + RDC_LOG(RDC_DEBUG, "Rocprofiler by default supports " << field_to_metric.size() << " fields"); +} + RdcRocpBase::~RdcRocpBase() { hsa_status_t status = HSA_STATUS_SUCCESS; status = hsa_shut_down(); @@ -335,6 +354,8 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, rdc_field_value return RDC_ST_BAD_PARAMETER; } + init_rocp_if_not(); + const bool is_eval_field = (eval_fields.find(field) != eval_fields.end()); const auto start_time = std::chrono::high_resolution_clock::now(); diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc index 24cd27c9aa..fe6f06d0a8 100644 --- a/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rvs/RvsBase.cc @@ -83,10 +83,12 @@ RdcRVSBase::RdcRVSBase() { RDC_LOG(RDC_DEBUG, "RVS CONFIG PATH: " << config_path); - // populate configs - for (auto& ent : std::filesystem::directory_iterator(config_path)) { - if (ent.is_regular_file()) { - _rvs_config_list.push_back(ent.path().string()); + if (std::filesystem::exists(config_path) && std::filesystem::is_directory(config_path)) { + // populate configs + for (auto& ent : std::filesystem::directory_iterator(config_path)) { + if (ent.is_regular_file()) { + _rvs_config_list.push_back(ent.path().string()); + } } }