SWDEV-392942 - Disable rocmtools
Temporarily disable rocmtools because of hsa_shut_down issues
Change-Id: I5e8b6729b8200ccdd5c399862bfc632ba69f884c
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rdc commit: 90e824c63b]
This commit is contained in:
@@ -51,7 +51,7 @@ option(BUILD_ROCRTEST "Build targets for librdc_rocr.so" ON)
|
||||
|
||||
# When cmake -DBUILD_ROCPTEST=off, it will not build the librdc_rocp.so
|
||||
# which requires the Rocm profiler.
|
||||
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" ON)
|
||||
option(BUILD_ROCPTEST "Build targets for librdc_rocp.so" OFF)
|
||||
|
||||
# When cmake -DBUILD_TESTS=off, it will not build RDC tests.
|
||||
option(BUILD_TESTS "Build test suite" OFF)
|
||||
|
||||
@@ -23,7 +23,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo
|
||||
Latex (pdfTeX 3.14159265-2.6-1.40.16) ## required to build the latest documentation
|
||||
gRPC and protoc ## required for communication
|
||||
libcap-dev ## required to manage the privileges.
|
||||
rocmtools ## required for profiler metrics
|
||||
|
||||
AMD ROCm platform (https://github.com/RadeonOpenCompute/ROCm)
|
||||
* It is recommended to install the complete AMD ROCm platform.
|
||||
@@ -31,8 +30,6 @@ RDC can run on AMD ROCm supported platforms, please refer to the **List of Suppo
|
||||
* At the minimum, these two components are required
|
||||
(i) AMD ROCm SMI Library (https://github.com/RadeonOpenCompute/rocm_smi_lib)
|
||||
(ii) AMD ROCk Kernel driver (https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver)
|
||||
* For profiler metrics, this component is required:
|
||||
(i) AMD ROCm Tools (https://github.com/ROCm-Developer-Tools/rocmtools)
|
||||
|
||||
## Building gRPC and protoc
|
||||
**NOTE:** gRPC and protoc compiler must be built when building RDC from source as pre-built packages are not available. When installing RDC from a package, gRPC and protoc will be installed from the package.
|
||||
@@ -157,10 +154,6 @@ cd /opt/rocm/rdc/bin
|
||||
./rdci dmon -u --list-all ## list all GPU counters
|
||||
./rdci dmon -u -i 0 -c 1 -e 100 ## monitor field 100 on gpu 0 for count of 1
|
||||
./rdci dmon -u -i 0 -c 1 -e 1,2 ## monitor fields 1,2 on gpu 0 for count of 1
|
||||
# below requires rocmtools to be installed
|
||||
./rdci dmon -u -i 0 -c 5 -e 700 ## monitor field 700 on gpu 0 for count of 5
|
||||
# below is only likely to work on MI series GPUs
|
||||
./rdci dmon -u -i 0 -c 5 -e 700,701,702 ## monitor fields 700,701,702
|
||||
```
|
||||
|
||||
## Troubleshooting rdcd
|
||||
@@ -181,13 +174,3 @@ RDC_LOG=DEBUG /opt/rocm/rdc/bin/rdcd
|
||||
RDC_LOG=DEBUG also works on rdci
|
||||
|
||||
ERROR, INFO, DEBUG logging levels are supported
|
||||
|
||||
- Reading `RDC_FI_PROF_*` crashes rdcd
|
||||
- All `RDC_FI_PROF_*` metrics return N/A
|
||||
|
||||
0. ROCMTools support is in beta.
|
||||
Reading registers beyond 700-702 range is not guaranteed to work.
|
||||
1. Does your GPU support selected fields?
|
||||
Field 700 (`RDC_FI_PROF_ELAPSED_CYCLES`) is supposed to be accessible on most GPUs.
|
||||
Others are mostly intended for MI series.
|
||||
2. Is rocmtools installed? Can you find `librocmtools.so`?
|
||||
|
||||
@@ -81,10 +81,9 @@ FLD_DESC_ENT(RDC_FI_ECC_UMC_DED, "UMC Double Error Detection",
|
||||
// This doesn't map to rocprofiler counters directly
|
||||
// See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h
|
||||
// See metrics.xml in rocmtools
|
||||
// TODO: uncomment rest of the fields when implemented
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "PROF_ELAPSED_COUNT", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "PROF_ACTIVE_WAVES", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "PROF_ACTIVE_CYCLES", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_CU_OCCUPANCY, "Active Waves / maximum Active Waves per CU", "PROF_CU_OCCUPANCY", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_CU_UTILIZATION, "Active Cycles / total Elapsed Cycles", "PROF_CU_UTILIZATION", false)
|
||||
//FLD_DESC_ENT(RDC_FI_PROF_FETCH_SIZE, "kb fetched from video memory", "PROF_FETCH_SIZE", false)
|
||||
|
||||
@@ -28,7 +28,6 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/RdcModuleMgr.h"
|
||||
#include "rdc_lib/RdcTelemetry.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcRocpLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
|
||||
@@ -51,7 +50,6 @@ class RdcModuleMgrImpl : public RdcModuleMgr {
|
||||
RdcSmiLibPtr smi_lib_;
|
||||
RdcMetricFetcherPtr fetcher_;
|
||||
RdcRocrLibPtr rocr_lib_;
|
||||
RdcRocpLibPtr rocp_lib_;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -29,7 +29,6 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/RdcTelemetry.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
#include "rdc_lib/impl/RdcRocpLib.h"
|
||||
#include "rdc_lib/RdcMetricFetcher.h"
|
||||
|
||||
namespace amd {
|
||||
@@ -51,8 +50,7 @@ class RdcTelemetryModule : public RdcTelemetry {
|
||||
uint32_t fields_count);
|
||||
|
||||
RdcTelemetryModule(const RdcSmiLibPtr& smi_lib,
|
||||
const RdcRasLibPtr& ras_module,
|
||||
const RdcRocpLibPtr& rocp_module);
|
||||
const RdcRasLibPtr& ras_module);
|
||||
|
||||
private:
|
||||
//< Helper function to dispatch fields to module
|
||||
|
||||
@@ -23,7 +23,6 @@ THE SOFTWARE.
|
||||
|
||||
#include "rdc_lib/impl/RdcDiagnosticModule.h"
|
||||
#include "rdc_lib/impl/RdcRasLib.h"
|
||||
#include "rdc_lib/impl/RdcRocpLib.h"
|
||||
#include "rdc_lib/impl/RdcRocrLib.h"
|
||||
#include "rdc_lib/impl/RdcTelemetryModule.h"
|
||||
|
||||
@@ -45,13 +44,9 @@ RdcTelemetryPtr RdcModuleMgrImpl::get_telemetry_module() {
|
||||
ras_lib_.reset(new RdcRasLib("librdc_ras.so"));
|
||||
}
|
||||
|
||||
if (!rocp_lib_) {
|
||||
rocp_lib_.reset(new RdcRocpLib("librdc_rocp.so"));
|
||||
}
|
||||
|
||||
if (!rdc_telemetry_module_) {
|
||||
rdc_telemetry_module_.reset(
|
||||
new RdcTelemetryModule(smi_lib_, ras_lib_, rocp_lib_));
|
||||
new RdcTelemetryModule(smi_lib_, ras_lib_));
|
||||
}
|
||||
|
||||
return rdc_telemetry_module_;
|
||||
|
||||
@@ -94,15 +94,11 @@ rdc_status_t RdcTelemetryModule::rdc_telemetry_fields_unwatch(
|
||||
|
||||
RdcTelemetryModule::RdcTelemetryModule(
|
||||
const RdcSmiLibPtr& smi_lib,
|
||||
const RdcRasLibPtr& ras_module,
|
||||
const RdcRocpLibPtr& rocp_module) {
|
||||
const RdcRasLibPtr& ras_module) {
|
||||
telemetry_modules_.push_back(smi_lib);
|
||||
if (ras_module) {
|
||||
telemetry_modules_.push_back(ras_module);
|
||||
}
|
||||
if (rocp_module) {
|
||||
telemetry_modules_.push_back(rocp_module);
|
||||
}
|
||||
|
||||
auto ite = telemetry_modules_.begin();
|
||||
for (; ite != telemetry_modules_.end(); ite++) {
|
||||
|
||||
@@ -87,7 +87,6 @@ target_link_libraries(${RDCTST}
|
||||
PUBLIC rdc_bootstrap
|
||||
PUBLIC rdc
|
||||
PUBLIC rdc_ras
|
||||
PUBLIC rdc_rocp
|
||||
PUBLIC c
|
||||
PUBLIC stdc++
|
||||
PUBLIC pthread)
|
||||
|
||||
Referens i nytt ärende
Block a user