d85657e5f2
RAS plugin loaded rocm-smi which is in conflict with amd-smi library Main source of grief was the map 'devInfoTypesStrings' that is defined in both rocm-smi and amd-smi We assume that rocm-smi would get lazy-loaded by RAS library and overwrite symbols defined in amd-smi. devInfoTypesStrings in rocm-smi contains different number of elements, the enums are also different. RDC relies on amd-smi's enums. One such enum is kDevGpuMetrics: rocm-smi: kDevGpuMetrics = 68 amd-smi: kDevGpuMetrics = 75 Example of overlapping map definitions: $ objdump --dynamic-syms /opt/rocm/lib/libamd_smi.so | grep devInfoTypesStrings 00000000003c4980 g DO .data.rel.ro0000000000000008 Base devInfoTypesStrings 00000000003db830 g DO .bss0000000000000030 Base _ZN3amd3smi6Device19devInfoTypesStringsE $ objdump --dynamic-syms /opt/rocm/lib/librocm_smi64.so | grep devInfoTypesStrings 00000000003dc590 g DO .bss0000000000000030 Base _ZN3amd3smi6Device19devInfoTypesStringsE 00000000003c9c68 g DO .data.rel.ro0000000000000008 Base devInfoTypesStrings Change-Id: Ib2f2db32b6abd7ebe84e7807c25581461eb86bae Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
67 baris
2.6 KiB
C++
67 baris
2.6 KiB
C++
/*
|
|
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
#ifndef INCLUDE_RDC_LIB_IMPL_RDCTELEMETRYMODULE_H_
|
|
#define INCLUDE_RDC_LIB_IMPL_RDCTELEMETRYMODULE_H_
|
|
|
|
#include <list>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "rdc_lib/RdcMetricFetcher.h"
|
|
#include "rdc_lib/RdcTelemetry.h"
|
|
#include "rdc_lib/impl/RdcSmiLib.h"
|
|
|
|
namespace amd {
|
|
namespace rdc {
|
|
|
|
class RdcTelemetryModule : public RdcTelemetry {
|
|
public:
|
|
rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, uint32_t fields_count,
|
|
rdc_field_value_f callback, void* user_data);
|
|
|
|
rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS],
|
|
uint32_t* field_count);
|
|
|
|
rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, uint32_t fields_count);
|
|
|
|
rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, uint32_t fields_count);
|
|
|
|
explicit RdcTelemetryModule(std::list<RdcTelemetryPtr> telemetry_modules);
|
|
|
|
private:
|
|
//< Helper function to dispatch fields to module
|
|
void get_fields_for_module(
|
|
rdc_gpu_field_t* fields, uint32_t fields_count,
|
|
std::map<RdcTelemetryPtr, std::vector<rdc_gpu_field_t>>& fields_in_module,
|
|
std::vector<rdc_gpu_field_value_t>& unsupport_fields); // NOLINT
|
|
std::list<RdcTelemetryPtr> telemetry_modules_;
|
|
std::map<uint32_t, RdcTelemetryPtr> fields_id_module_;
|
|
};
|
|
|
|
typedef std::shared_ptr<RdcTelemetryModule> RdcTelemetryModulePtr;
|
|
|
|
} // namespace rdc
|
|
} // namespace amd
|
|
|
|
#endif // INCLUDE_RDC_LIB_IMPL_RDCTELEMETRYMODULE_H_
|