diff --git a/projects/rdc/CMakeLists.txt b/projects/rdc/CMakeLists.txt index 49cb64c6b6..d8c9162f96 100755 --- a/projects/rdc/CMakeLists.txt +++ b/projects/rdc/CMakeLists.txt @@ -51,7 +51,7 @@ option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON) # When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so # which requires the Rocm profiler. -option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" ON) +option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF) # When cmake -DBUILD_TESTS=off, it will not build RDC tests. option(BUILD_TESTS "Build test suite" OFF) diff --git a/projects/rdc/README.md b/projects/rdc/README.md index 6810a67612..ff96557a91 100644 --- a/projects/rdc/README.md +++ b/projects/rdc/README.md @@ -23,7 +23,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo Latex (pdfTeX 3.14159265-2.6-1.40.16) ## required to build the latest documentation gRPC and protoc ## required for communication libcap-dev ## required to manage the privileges. - rocmtools ## required for profiler metrics AMD ROCm platform (https://github.com/RadeonOpenCompute/ROCm) * It is recommended to install the complete AMD ROCm platform. @@ -31,8 +30,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo * At the minimum, these two components are required (i) AMD ROCm SMI Library (https://github.com/RadeonOpenCompute/rocm_smi_lib) (ii) AMD ROCk Kernel driver (https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver) - * For profiler metrics, this component is required: - (i) AMD ROCm Tools (https://github.com/ROCm-Developer-Tools/rocmtools) ## Building gRPC and protoc **NOTE:** gRPC and protoc compiler must be built when building RDC from source as pre-built packages are not available. When installing RDC from a package, gRPC and protoc will be installed from the package. @@ -157,10 +154,6 @@ cd /opt/rocm/rdc/bin ./rdci dmon -u --list-all ## list all GPU counters ./rdci dmon -u -i 0 -c 1 -e 100 ## monitor field 100 on gpu 0 for count of 1 ./rdci dmon -u -i 0 -c 1 -e 1,2 ## monitor fields 1,2 on gpu 0 for count of 1 -# below requires rocmtools to be installed -./rdci dmon -u -i 0 -c 5 -e 700 ## monitor field 700 on gpu 0 for count of 5 -# below is only likely to work on MI series GPUs -./rdci dmon -u -i 0 -c 5 -e 700,701,702 ## monitor fields 700,701,702 ``` ## Troubleshooting rdcd @@ -181,13 +174,3 @@ RDC_LOG=DEBUG /opt/rocm/rdc/bin/rdcd RDC_LOG=DEBUG also works on rdci ERROR, INFO, DEBUG logging levels are supported - -- Reading `RDC_FI_PROF_*` crashes rdcd -- All `RDC_FI_PROF_*` metrics return N/A - - 0. ROCMTools support is in beta. - Reading registers beyond 700-702 range is not guaranteed to work. - 1. Does your GPU support selected fields? - Field 700 (`RDC_FI_PROF_ELAPSED_CYCLES`) is supposed to be accessible on most GPUs. - Others are mostly intended for MI series. - 2. Is rocmtools installed? Can you find `librocmtools.so`? diff --git a/projects/rdc/common/rdc_field.data b/projects/rdc/common/rdc_field.data index 6985f73a24..668a704f8a 100644 --- a/projects/rdc/common/rdc_field.data +++ b/projects/rdc/common/rdc_field.data @@ -81,10 +81,9 @@ FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection", // This doesn't map to rocprofiler counters directly // See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h // See metrics.xml in rocmtools -// TODO: uncomment rest of the fields when implemented -FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false) +//FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false) +//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false) +//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false) //FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "PROF_CU_OCCUPANCY", false) //FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "PROF_CU_UTILIZATION", false) //FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "PROF_FETCH_SIZE", false) diff --git a/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h b/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h index 0f762729ae..fe5c625ff9 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcModuleMgrImpl.h @@ -28,7 +28,6 @@ THE SOFTWARE. #include "rdc_lib/RdcModuleMgr.h" #include "rdc_lib/RdcTelemetry.h" #include "rdc_lib/impl/RdcRasLib.h" -#include "rdc_lib/impl/RdcRocpLib.h" #include "rdc_lib/impl/RdcRocrLib.h" #include "rdc_lib/impl/RdcSmiLib.h" @@ -51,7 +50,6 @@ class RdcModuleMgrImpl : public RdcModuleMgr { RdcSmiLibPtr smi_lib_; RdcMetricFetcherPtr fetcher_; RdcRocrLibPtr rocr_lib_; - RdcRocpLibPtr rocp_lib_; }; } // namespace rdc diff --git a/projects/rdc/include/rdc_lib/impl/RdcTelemetryModule.h b/projects/rdc/include/rdc_lib/impl/RdcTelemetryModule.h index 59d66ce50e..c9255d6043 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcTelemetryModule.h +++ b/projects/rdc/include/rdc_lib/impl/RdcTelemetryModule.h @@ -29,7 +29,6 @@ THE SOFTWARE. #include "rdc_lib/RdcTelemetry.h" #include "rdc_lib/impl/RdcRasLib.h" #include "rdc_lib/impl/RdcSmiLib.h" -#include "rdc_lib/impl/RdcRocpLib.h" #include "rdc_lib/RdcMetricFetcher.h" namespace amd { @@ -51,8 +50,7 @@ class RdcTelemetryModule : public RdcTelemetry { uint32_t fields_count); RdcTelemetryModule(const RdcSmiLibPtr& smi_lib, - const RdcRasLibPtr& ras_module, - const RdcRocpLibPtr& rocp_module); + const RdcRasLibPtr& ras_module); private: //< Helper function to dispatch fields to module diff --git a/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc index b257637ff1..c61e517bb5 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcModuleMgrImpl.cc @@ -23,7 +23,6 @@ THE SOFTWARE. #include "rdc_lib/impl/RdcDiagnosticModule.h" #include "rdc_lib/impl/RdcRasLib.h" -#include "rdc_lib/impl/RdcRocpLib.h" #include "rdc_lib/impl/RdcRocrLib.h" #include "rdc_lib/impl/RdcTelemetryModule.h" @@ -45,13 +44,9 @@ RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() { ras_lib_.reset(new RdcRasLib("librdc_ras.so")); } - if (!rocp_lib_) { - rocp_lib_.reset(new RdcRocpLib("librdc_rocp.so")); - } - if (!rdc_telemetry_module_) { rdc_telemetry_module_.reset( - new RdcTelemetryModule(smi_lib_, ras_lib_, rocp_lib_)); + new RdcTelemetryModule(smi_lib_, ras_lib_)); } return rdc_telemetry_module_; diff --git a/projects/rdc/rdc_libs/rdc/src/RdcTelemetryModule.cc b/projects/rdc/rdc_libs/rdc/src/RdcTelemetryModule.cc index 264c224eaa..e89f503837 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcTelemetryModule.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcTelemetryModule.cc @@ -94,15 +94,11 @@ rdc_status_t RdcTelemetryModule::rdc_telemetry_fields_unwatch( RdcTelemetryModule::RdcTelemetryModule( const RdcSmiLibPtr& smi_lib, - const RdcRasLibPtr& ras_module, - const RdcRocpLibPtr& rocp_module) { + const RdcRasLibPtr& ras_module) { telemetry_modules_.push_back(smi_lib); if (ras_module) { telemetry_modules_.push_back(ras_module); } - if (rocp_module) { - telemetry_modules_.push_back(rocp_module); - } auto ite = telemetry_modules_.begin(); for (; ite != telemetry_modules_.end(); ite++) { diff --git a/projects/rdc/tests/rdc_tests/CMakeLists.txt b/projects/rdc/tests/rdc_tests/CMakeLists.txt index 09aa46644b..69e821ccf7 100755 --- a/projects/rdc/tests/rdc_tests/CMakeLists.txt +++ b/projects/rdc/tests/rdc_tests/CMakeLists.txt @@ -87,7 +87,6 @@ target_link_libraries(${RDCTST} PUBLIC rdc_bootstrap PUBLIC rdc PUBLIC rdc_ras - PUBLIC rdc_rocp PUBLIC c PUBLIC stdc++ PUBLIC pthread)