d85657e5f2
RAS plugin loaded rocm-smi which is in conflict with amd-smi library Main source of grief was the map 'devInfoTypesStrings' that is defined in both rocm-smi and amd-smi We assume that rocm-smi would get lazy-loaded by RAS library and overwrite symbols defined in amd-smi. devInfoTypesStrings in rocm-smi contains different number of elements, the enums are also different. RDC relies on amd-smi's enums. One such enum is kDevGpuMetrics: rocm-smi: kDevGpuMetrics = 68 amd-smi: kDevGpuMetrics = 75 Example of overlapping map definitions: $ objdump --dynamic-syms /opt/rocm/lib/libamd_smi.so | grep devInfoTypesStrings 00000000003c4980 g DO .data.rel.ro0000000000000008 Base devInfoTypesStrings 00000000003db830 g DO .bss0000000000000030 Base _ZN3amd3smi6Device19devInfoTypesStringsE $ objdump --dynamic-syms /opt/rocm/lib/librocm_smi64.so | grep devInfoTypesStrings 00000000003dc590 g DO .bss0000000000000030 Base _ZN3amd3smi6Device19devInfoTypesStringsE 00000000003c9c68 g DO .data.rel.ro0000000000000008 Base devInfoTypesStrings Change-Id: Ib2f2db32b6abd7ebe84e7807c25581461eb86bae Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
49 línte
2.0 KiB
C
49 línte
2.0 KiB
C
/*
|
|
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
#ifndef INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_
|
|
#define INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_
|
|
|
|
// The telemetry interface for libraries, for example, AMD-SMI.
|
|
#include <rdc/rdc.h>
|
|
|
|
extern "C" {
|
|
|
|
// The library will implement below function
|
|
|
|
// Which test cases are supported in the library
|
|
rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST_CASES],
|
|
uint32_t* test_case_count);
|
|
|
|
// Run a specific test case
|
|
|
|
rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case,
|
|
uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count,
|
|
const char* config, size_t config_size,
|
|
rdc_diag_test_result_t* result);
|
|
|
|
rdc_status_t rdc_diag_init(uint64_t flags);
|
|
|
|
rdc_status_t rdc_diag_destroy();
|
|
}
|
|
|
|
#endif // INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_
|