SWDEV-392942 - Disable rocmtools

Temporarily disable rocmtools because of hsa_shut_down issues

Change-Id: I5e8b6729b8200ccdd5c399862bfc632ba69f884c
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
This commit is contained in:
Galantsev, Dmitrii
2023-02-24 17:28:42 -06:00
parent 8f6bf948cc
commit 90e824c63b
8 changed files with 7 additions and 39 deletions
+1 -1
View File
@@ -51,7 +51,7 @@ option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON)
# When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so
# which requires the Rocm profiler.
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" ON)
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF)
# When cmake -DBUILD_TESTS=off, it will not build RDC tests.
option(BUILD_TESTS "Build test suite" OFF)
-17
View File
@@ -23,7 +23,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo
Latex (pdfTeX 3.14159265-2.6-1.40.16) ## required to build the latest documentation
gRPC and protoc ## required for communication
libcap-dev ## required to manage the privileges.
rocmtools ## required for profiler metrics
AMD ROCm platform (https://github.com/RadeonOpenCompute/ROCm)
* It is recommended to install the complete AMD ROCm platform.
@@ -31,8 +30,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo
* At the minimum, these two components are required
(i) AMD ROCm SMI Library (https://github.com/RadeonOpenCompute/rocm_smi_lib)
(ii) AMD ROCk Kernel driver (https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver)
* For profiler metrics, this component is required:
(i) AMD ROCm Tools (https://github.com/ROCm-Developer-Tools/rocmtools)
## Building gRPC and protoc
**NOTE:** gRPC and protoc compiler must be built when building RDC from source as pre-built packages are not available. When installing RDC from a package, gRPC and protoc will be installed from the package.
@@ -157,10 +154,6 @@ cd /opt/rocm/rdc/bin
./rdci dmon -u --list-all ## list all GPU counters
./rdci dmon -u -i 0 -c 1 -e 100 ## monitor field 100 on gpu 0 for count of 1
./rdci dmon -u -i 0 -c 1 -e 1,2 ## monitor fields 1,2 on gpu 0 for count of 1
# below requires rocmtools to be installed
./rdci dmon -u -i 0 -c 5 -e 700 ## monitor field 700 on gpu 0 for count of 5
# below is only likely to work on MI series GPUs
./rdci dmon -u -i 0 -c 5 -e 700,701,702 ## monitor fields 700,701,702
```
## Troubleshooting rdcd
@@ -181,13 +174,3 @@ RDC_LOG=DEBUG /opt/rocm/rdc/bin/rdcd
RDC_LOG=DEBUG also works on rdci
ERROR, INFO, DEBUG logging levels are supported
- Reading `RDC_FI_PROF_*` crashes rdcd
- All `RDC_FI_PROF_*` metrics return N/A
0. ROCMTools support is in beta.
Reading registers beyond 700-702 range is not guaranteed to work.
1. Does your GPU support selected fields?
Field 700 (`RDC_FI_PROF_ELAPSED_CYCLES`) is supposed to be accessible on most GPUs.
Others are mostly intended for MI series.
2. Is rocmtools installed? Can you find `librocmtools.so`?
+3 -4
View File
@@ -81,10 +81,9 @@ FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection",
// This doesn't map to rocprofiler counters directly
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
// See metrics.xml in rocmtools
// TODO: uncomment rest of the fields when implemented
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false)
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false)
//FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false)
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false)
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false)
//FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "PROF_CU_OCCUPANCY", false)
//FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "PROF_CU_UTILIZATION", false)
//FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "PROF_FETCH_SIZE", false)
-2
View File
@@ -28,7 +28,6 @@ THE SOFTWARE.
#include "rdc_lib/RdcModuleMgr.h"
#include "rdc_lib/RdcTelemetry.h"
#include "rdc_lib/impl/RdcRasLib.h"
#include "rdc_lib/impl/RdcRocpLib.h"
#include "rdc_lib/impl/RdcRocrLib.h"
#include "rdc_lib/impl/RdcSmiLib.h"
@@ -51,7 +50,6 @@ class RdcModuleMgrImpl : public RdcModuleMgr {
RdcSmiLibPtr smi_lib_;
RdcMetricFetcherPtr fetcher_;
RdcRocrLibPtr rocr_lib_;
RdcRocpLibPtr rocp_lib_;
};
} // namespace rdc
+1 -3
View File
@@ -29,7 +29,6 @@ THE SOFTWARE.
#include "rdc_lib/RdcTelemetry.h"
#include "rdc_lib/impl/RdcRasLib.h"
#include "rdc_lib/impl/RdcSmiLib.h"
#include "rdc_lib/impl/RdcRocpLib.h"
#include "rdc_lib/RdcMetricFetcher.h"
namespace amd {
@@ -51,8 +50,7 @@ class RdcTelemetryModule : public RdcTelemetry {
uint32_t fields_count);
RdcTelemetryModule(const RdcSmiLibPtr& smi_lib,
const RdcRasLibPtr& ras_module,
const RdcRocpLibPtr& rocp_module);
const RdcRasLibPtr& ras_module);
private:
//< Helper function to dispatch fields to module
+1 -6
View File
@@ -23,7 +23,6 @@ THE SOFTWARE.
#include "rdc_lib/impl/RdcDiagnosticModule.h"
#include "rdc_lib/impl/RdcRasLib.h"
#include "rdc_lib/impl/RdcRocpLib.h"
#include "rdc_lib/impl/RdcRocrLib.h"
#include "rdc_lib/impl/RdcTelemetryModule.h"
@@ -45,13 +44,9 @@ RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() {
ras_lib_.reset(new RdcRasLib("librdc_ras.so"));
}
if (!rocp_lib_) {
rocp_lib_.reset(new RdcRocpLib("librdc_rocp.so"));
}
if (!rdc_telemetry_module_) {
rdc_telemetry_module_.reset(
new RdcTelemetryModule(smi_lib_, ras_lib_, rocp_lib_));
new RdcTelemetryModule(smi_lib_, ras_lib_));
}
return rdc_telemetry_module_;
+1 -5
View File
@@ -94,15 +94,11 @@ rdc_status_t RdcTelemetryModule::rdc_telemetry_fields_unwatch(
RdcTelemetryModule::RdcTelemetryModule(
const RdcSmiLibPtr& smi_lib,
const RdcRasLibPtr& ras_module,
const RdcRocpLibPtr& rocp_module) {
const RdcRasLibPtr& ras_module) {
telemetry_modules_.push_back(smi_lib);
if (ras_module) {
telemetry_modules_.push_back(ras_module);
}
if (rocp_module) {
telemetry_modules_.push_back(rocp_module);
}
auto ite = telemetry_modules_.begin();
for (; ite != telemetry_modules_.end(); ite++) {
-1
View File
@@ -87,7 +87,6 @@ target_link_libraries(${RDCTST}
PUBLIC rdc_bootstrap
PUBLIC rdc
PUBLIC rdc_ras
PUBLIC rdc_rocp
PUBLIC c
PUBLIC stdc++
PUBLIC pthread)