From 6b246dcf4b9cd6275ef5e0b659ce303620fac0c1 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Fri, 24 Jul 2020 19:40:48 -0500 Subject: [PATCH] rdc_field_t replaces uint32_t; centralize field data Make the RDC use the new rdc_field_t enum instead of uint32_t. This will help prevent invalid field types from being passed in. Also, centralize where data related to fields is kept. This will reduce the number of places where changes are required each time a new field is added. Finally, cleaned up several cpplint issues. Change-Id: I48e4512e18c164411d8b09ae3d4bed99fba359ec [ROCm/rdc commit: 5950ebadc4542d0d0eb3458bdb9995d64945ce0b] --- projects/rdc/common/rdc_field_data.data | 39 ++++++ projects/rdc/common/rdc_fields_supported.cc | 72 ++++++++++++ projects/rdc/common/rdc_fields_supported.h | 50 ++++++++ projects/rdc/example/field_value_example.cc | 2 +- projects/rdc/include/rdc/rdc.h | 111 +++++++----------- .../rdc/include/rdc_lib/RdcCacheManager.h | 12 +- projects/rdc/include/rdc_lib/RdcException.h | 6 +- .../rdc/include/rdc_lib/RdcGroupSettings.h | 8 +- projects/rdc/include/rdc_lib/RdcHandler.h | 12 +- .../rdc/include/rdc_lib/RdcLibraryLoader.h | 6 +- projects/rdc/include/rdc_lib/RdcLogger.h | 6 +- .../rdc/include/rdc_lib/RdcMetricFetcher.h | 9 +- .../rdc/include/rdc_lib/RdcMetricsUpdater.h | 6 +- projects/rdc/include/rdc_lib/RdcWatchTable.h | 6 +- .../rdc_lib/impl/RdcCacheManagerImpl.h | 16 +-- .../include/rdc_lib/impl/RdcEmbeddedHandler.h | 12 +- .../rdc_lib/impl/RdcGroupSettingsImpl.h | 9 +- .../rdc_lib/impl/RdcMetricFetcherImpl.h | 13 +- .../rdc_lib/impl/RdcMetricsUpdaterImpl.h | 8 +- .../rdc_lib/impl/RdcStandaloneHandler.h | 12 +- .../include/rdc_lib/impl/RdcWatchTableImpl.h | 15 +-- projects/rdc/include/rdc_lib/rdc_common.h | 13 +- projects/rdc/rdc_libs/CMakeLists.txt | 7 +- .../rdc_libs/bootstrap/src/RdcBootStrap.cc | 35 ++---- .../rdc_libs/rdc/src/RdcCacheManagerImpl.cc | 6 +- .../rdc_libs/rdc/src/RdcEmbeddedHandler.cc | 13 +- .../rdc_libs/rdc/src/RdcGroupSettingsImpl.cc | 6 +- .../rdc_libs/rdc/src/RdcMetricFetcherImpl.cc | 18 +-- .../rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc | 2 +- .../rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc | 8 +- .../rdc_client/src/RdcStandaloneHandler.cc | 13 +- projects/rdc/rdci/CMakeLists.txt | 2 + projects/rdc/rdci/include/RdciDmonSubSystem.h | 2 +- projects/rdc/rdci/include/RdciSubSystem.h | 2 - projects/rdc/rdci/src/RdciDmonSubSystem.cc | 47 ++++---- .../rdc/rdci/src/RdciFieldGroupSubSystem.cc | 8 +- projects/rdc/rdci/src/RdciSubSystem.cc | 29 +---- projects/rdc/server/src/rdc_api_service.cc | 15 +-- .../rdc_tests/functional/rdci_discovery.cc | 29 ++--- .../tests/rdc_tests/functional/rdci_dmon.cc | 34 +++--- .../rdc_tests/functional/rdci_fieldgroup.cc | 47 ++++---- .../tests/rdc_tests/functional/rdci_group.cc | 47 +++----- .../tests/rdc_tests/functional/rdci_stats.cc | 34 +++--- 43 files changed, 459 insertions(+), 388 deletions(-) create mode 100644 projects/rdc/common/rdc_field_data.data create mode 100644 projects/rdc/common/rdc_fields_supported.cc create mode 100644 projects/rdc/common/rdc_fields_supported.h diff --git a/projects/rdc/common/rdc_field_data.data b/projects/rdc/common/rdc_field_data.data new file mode 100644 index 0000000000..3b27d56b6d --- /dev/null +++ b/projects/rdc/common/rdc_field_data.data @@ -0,0 +1,39 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +FLD_DESC_ENT(RDC_FI_INVALID, "Unknown/Invalid field", "INVALID") +FLD_DESC_ENT(RDC_FI_GPU_COUNT, "GPU count in the system", "GPU_COUNT") +FLD_DESC_ENT(RDC_FI_DEV_NAME, "Name of the device", "DEV_NAME") +FLD_DESC_ENT(RDC_FI_GPU_CLOCK, "Current GPU clock frequencies.", "GPU_CLOCK") +FLD_DESC_ENT(RDC_FI_MEM_CLOCK, "Current Memory clock frequencies.", "MEM_CLOCK") +FLD_DESC_ENT(RDC_FI_MEMORY_TEMP, "Memory temperature in millidegrees Celsius.", "MEMORY_TEMP") +FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsius.", "GPU_TEMP") +FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts.", "POWER_USAGE") +FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second.", "PCIE_TX") +FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second.", "PCIE_RX") +FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage.", "GPU_UTIL") +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, + "Memory usage of the GPU instance in bytes.", "GPU_MEMORY_USAGE") +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL") +FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated correctable ECC errors.", "ECC_CORRECT") +FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL, + "Accumulated uncorrectable ECC errors.", "ECC_UNCORRECT") + diff --git a/projects/rdc/common/rdc_fields_supported.cc b/projects/rdc/common/rdc_fields_supported.cc new file mode 100644 index 0000000000..83c676d852 --- /dev/null +++ b/projects/rdc/common/rdc_fields_supported.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include + +#include + +#include "common/rdc_fields_supported.h" +#include "rdc/rdc.h" +namespace amd { +namespace rdc { + +#define FLD_DESC_ENT(ID, DESC, LABEL) \ + {static_cast(ID), {#ID, (DESC), (LABEL)}}, +static const fld_id2name_map_t field_id_to_descript = { + #include "common/rdc_field_data.data" +}; +#undef FLD_DESC_ENT + +#define FLD_DESC_ENT(ID, DESC, LABEL) {#ID, (ID)}, +static fld_name2id_map_t field_name_to_id = { + #include "common/rdc_field_data.data" // NOLINT +}; +#undef FLD_DESC_ENT + + + +amd::rdc::fld_id2name_map_t & +get_field_id_description_from_id(void) { + return field_id_to_descript; +} + +bool get_field_id_from_name(const std::string name, rdc_field_t *value) { + assert(value != nullptr); + auto id = field_name_to_id.find(name); + if (id == field_name_to_id.end()) { + return false; + } + + *value = static_cast(id->second); + return true; +} + +bool is_field_valid(rdc_field_t field_id) { + if (field_id == RDC_FI_INVALID) { + return false; + } + return field_id_to_descript.find(static_cast(field_id)) != + field_id_to_descript.end(); +} + + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/common/rdc_fields_supported.h b/projects/rdc/common/rdc_fields_supported.h new file mode 100644 index 0000000000..02c0ebbd09 --- /dev/null +++ b/projects/rdc/common/rdc_fields_supported.h @@ -0,0 +1,50 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef COMMON_RDC_FIELDS_SUPPORTED_H_ +#define COMMON_RDC_FIELDS_SUPPORTED_H_ + +#include +#include + +#include "rdc/rdc.h" + +namespace amd { +namespace rdc { + +typedef struct { + std::string enum_name; + std::string description; + std::string label; +} field_id_descript; + +typedef const std::unordered_map + fld_id2name_map_t; +typedef std::unordered_map fld_name2id_map_t; + +bool get_field_id_from_name(const std::string name, rdc_field_t *value); +fld_id2name_map_t & get_field_id_description_from_id(void); // NOLINT +bool is_field_valid(rdc_field_t field_id); + +} // namespace rdc +} // namespace amd + +#endif // COMMON_RDC_FIELDS_SUPPORTED_H_ diff --git a/projects/rdc/example/field_value_example.cc b/projects/rdc/example/field_value_example.cc index b742c6f958..8f7a1b6ae6 100644 --- a/projects/rdc/example/field_value_example.cc +++ b/projects/rdc/example/field_value_example.cc @@ -129,7 +129,7 @@ int main(int, char **) { // Create the field groups to monitor POWER and TEMP rdc_field_grp_t field_group_id; - uint32_t field_ids[2]; + rdc_field_t field_ids[2]; field_ids[0] = RDC_FI_GPU_MEMORY_USAGE; field_ids[1] = RDC_FI_POWER_USAGE; result = rdc_group_field_create(rdc_handle, 2, diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index ff5a2e5cf8..0491b34604 100755 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -128,77 +128,46 @@ typedef enum { #define RDC_MAX_NUM_FIELD_GROUPS 64 /** - * Memory usage of the GPU instance + * These enums are used to specify a particular field to be retrieved. */ -#define RDC_FI_GPU_MEMORY_USAGE 525 +typedef enum { + RDC_FI_INVALID = 0, //!< Invalid field value + //!< @brief Identifier fields + RDC_FI_GPU_COUNT = 1, //!< GPU count in the system + RDC_FI_DEV_NAME, //!< Name of the device -/** - * Total memory of the GPU instance - */ -#define RDC_FI_GPU_MEMORY_TOTAL 580 + /* + * @brief Frequency related fields + */ + RDC_FI_GPU_CLOCK = 100, //!< The current clock for the GPU + RDC_FI_MEM_CLOCK, //!< Clock for the memory -/** - * Power usage for the device - */ -#define RDC_FI_POWER_USAGE 155 + /* + * @brief Physical monitor fields + */ + RDC_FI_MEMORY_TEMP = 200, //!< Memory temperature for the device + RDC_FI_GPU_TEMP, //!< Current temperature for the device + RDC_FI_POWER_USAGE = 300, //!< Power usage for the device -/** - * The current clock for the GPU - */ -#define RDC_FI_GPU_CLOCK 100 + /* + * @brief PCIe related fields + */ + RDC_FI_PCIE_TX = 400, //!< PCIe Tx utilization information + RDC_FI_PCIE_RX, //!< PCIe Rx utilization information -/** - * Clock for the memory - */ -#define RDC_FI_MEM_CLOCK 101 - -/** - * PCIe Tx utilization information - */ -#define RDC_FI_PCIE_TX 200 - -/** - * PCIe Rx utilization information - */ -#define RDC_FI_PCIE_RX 201 - - -/** - * GPU Utilization - */ -#define RDC_FI_GPU_UTIL 203 - -/** - * Accumulated correctable ECC errors - */ -#define RDC_FI_ECC_CORRECT_TOTAL 312 - -/** - * Accumulated uncorrectable ECC errors - */ -#define RDC_FI_ECC_UNCORRECT_TOTAL 313 - -/** - * Memory temperature for the device - */ -#define RDC_FI_MEMORY_TEMP 140 - -/** - * Current temperature for the device - */ -#define RDC_FI_GPU_TEMP 150 - - -/** - * GPU count in the system - */ -#define RDC_FI_GPU_COUNT 4 - -/** - * Name of the device - */ -#define RDC_FI_DEV_NAME 50 + /* + * @brief GPU usage related fields + */ + RDC_FI_GPU_UTIL = 500, //!< GPU Utilization + RDC_FI_GPU_MEMORY_USAGE, //!< Memory usage of the GPU instance + RDC_FI_GPU_MEMORY_TOTAL, //!< Total memory of the GPU instance + /** + * @brief ECC related fields + */ + RDC_FI_ECC_CORRECT_TOTAL = 600, //!< Accumulated correctable ECC errors + RDC_FI_ECC_UNCORRECT_TOTAL, //!< Accumulated uncorrectable ECC errors +} rdc_field_t; /** * @brief handlers used in various rdc calls @@ -273,7 +242,7 @@ typedef struct { * @brief The structure to store the field value */ typedef struct { - uint32_t field_id; //!< The field id of the value + rdc_field_t field_id; //!< The field id of the value int status; //!< RDC_ST_OK or error status uint64_t ts; //!< Timestamp in usec since 1970 rdc_field_type_t type; //!< The field type @@ -294,7 +263,7 @@ typedef struct { /** * The list of fields in the group */ - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; } rdc_field_group_info_t; /** @@ -647,7 +616,7 @@ rdc_status_t rdc_group_gpu_destroy(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id); /** @@ -743,7 +712,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, rdc_field_value* value); + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value); /** * @brief Request a history cached field of a GPU @@ -767,7 +736,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value); /** @@ -806,7 +775,7 @@ const char* rdc_status_string(rdc_status_t status); * * @retval The string to describe the field. */ -const char* field_id_string(uint32_t field_id); +const char* field_id_string(rdc_field_t field_id); #ifdef __cplusplus } diff --git a/projects/rdc/include/rdc_lib/RdcCacheManager.h b/projects/rdc/include/rdc_lib/RdcCacheManager.h index 0052639d26..1cedc06c1b 100644 --- a/projects/rdc/include/rdc_lib/RdcCacheManager.h +++ b/projects/rdc/include/rdc_lib/RdcCacheManager.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCCACHEMANAGER_H_ -#define RDC_LIB_RDCCACHEMANAGER_H_ +#ifndef INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ +#define INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ #include #include @@ -36,13 +36,13 @@ namespace rdc { class RdcCacheManager { public: virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) = 0; + rdc_field_t field, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; virtual rdc_status_t rdc_update_cache(uint32_t gpu_index, const rdc_field_value& value) = 0; - virtual rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id, + virtual rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) = 0; virtual std::string get_cache_stats() = 0; @@ -69,4 +69,4 @@ typedef std::shared_ptr RdcCacheManagerPtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCCACHEMANAGER_H_ +#endif // INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcException.h b/projects/rdc/include/rdc_lib/RdcException.h index 62225c12ef..9c6b320cfc 100644 --- a/projects/rdc/include/rdc_lib/RdcException.h +++ b/projects/rdc/include/rdc_lib/RdcException.h @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDCI_INCLUDE_RDCEXCEPTION_H_ -#define RDCI_INCLUDE_RDCEXCEPTION_H_ +#ifndef INCLUDE_RDC_LIB_RDCEXCEPTION_H_ +#define INCLUDE_RDC_LIB_RDCEXCEPTION_H_ #include #include @@ -45,5 +45,5 @@ class RdcException : public std::exception { } // namespace rdc } // namespace amd -#endif // RDCI_INCLUDE_RDCEXCEPTION_H_ +#endif // INCLUDE_RDC_LIB_RDCEXCEPTION_H_ diff --git a/projects/rdc/include/rdc_lib/RdcGroupSettings.h b/projects/rdc/include/rdc_lib/RdcGroupSettings.h index 89d6f4ed22..4affe16b9e 100644 --- a/projects/rdc/include/rdc_lib/RdcGroupSettings.h +++ b/projects/rdc/include/rdc_lib/RdcGroupSettings.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCGROUPSETTINGS_H_ -#define RDC_LIB_RDCGROUPSETTINGS_H_ +#ifndef INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ +#define INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ #include #include "rdc_lib/rdc_common.h" @@ -45,7 +45,7 @@ class RdcGroupSettings { virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) = 0; virtual rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) = 0; @@ -64,4 +64,4 @@ const uint32_t JOB_FIELD_ID = 0; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCGROUPSETTINGS_H_ +#endif // INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ diff --git a/projects/rdc/include/rdc_lib/RdcHandler.h b/projects/rdc/include/rdc_lib/RdcHandler.h index a89a9a8e89..08a570b7f1 100644 --- a/projects/rdc/include/rdc_lib/RdcHandler.h +++ b/projects/rdc/include/rdc_lib/RdcHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCHANDLER_H_ -#define RDC_LIB_RDCHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_RDCHANDLER_H_ +#define INCLUDE_RDC_LIB_RDCHANDLER_H_ #include "rdc_lib/rdc_common.h" #include "rdc/rdc.h" @@ -52,7 +52,7 @@ class RdcHandler { virtual rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) = 0; virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) = 0; virtual rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -73,9 +73,9 @@ class RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) = 0; virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) = 0; + rdc_field_t field, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) = 0; @@ -89,4 +89,4 @@ class RdcHandler { } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCHANDLER_H_ +#endif // INCLUDE_RDC_LIB_RDCHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcLibraryLoader.h b/projects/rdc/include/rdc_lib/RdcLibraryLoader.h index 7a3d7fe2be..2b3c1a4352 100644 --- a/projects/rdc/include/rdc_lib/RdcLibraryLoader.h +++ b/projects/rdc/include/rdc_lib/RdcLibraryLoader.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCLIBRARYLOADER_H_ -#define RDC_LIB_RDCLIBRARYLOADER_H_ +#ifndef INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ +#define INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ #include #include #include @@ -83,4 +83,4 @@ template rdc_status_t RdcLibraryLoader::load(const char* filename, } // namespace amd -#endif // RDC_LIB_RDCLIBRARYLOADER_H_ +#endif // INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcLogger.h b/projects/rdc/include/rdc_lib/RdcLogger.h index b2d891b73e..ead06106be 100644 --- a/projects/rdc/include/rdc_lib/RdcLogger.h +++ b/projects/rdc/include/rdc_lib/RdcLogger.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCLOGGER_H_ -#define RDC_LIB_RDCLOGGER_H_ +#ifndef INCLUDE_RDC_LIB_RDCLOGGER_H_ +#define INCLUDE_RDC_LIB_RDCLOGGER_H_ #include #include #include // NOLINT @@ -56,4 +56,4 @@ class RdcLogger { } // namespace amd -#endif // RDC_LIB_RDCLOGGER_H_ +#endif // INCLUDE_RDC_LIB_RDCLOGGER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcMetricFetcher.h b/projects/rdc/include/rdc_lib/RdcMetricFetcher.h index c4c0cd6122..81ef756832 100644 --- a/projects/rdc/include/rdc_lib/RdcMetricFetcher.h +++ b/projects/rdc/include/rdc_lib/RdcMetricFetcher.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCMETRICFETCHER_H_ -#define RDC_LIB_RDCMETRICFETCHER_H_ +#ifndef INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ +#define INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ #include #include "rdc_lib/rdc_common.h" @@ -33,8 +33,7 @@ namespace rdc { class RdcMetricFetcher { public: virtual rdc_status_t fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) = 0; - virtual bool is_field_valid(uint32_t field_id) const = 0; + rdc_field_t field_id, rdc_field_value* value) = 0; virtual ~RdcMetricFetcher() {} }; @@ -43,4 +42,4 @@ typedef std::shared_ptr RdcMetricFetcherPtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCMETRICFETCHER_H_ +#endif // INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h b/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h index d724c483e5..681855aff6 100644 --- a/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h +++ b/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCMETRICSUPDATER_H_ -#define RDC_LIB_RDCMETRICSUPDATER_H_ +#ifndef INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ +#define INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ #include @@ -39,4 +39,4 @@ typedef std::shared_ptr RdcMetricsUpdaterPtr; } // namespace amd -#endif // RDC_LIB_RDCMETRICSUPDATER_H_ +#endif // INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcWatchTable.h b/projects/rdc/include/rdc_lib/RdcWatchTable.h index 1597f73011..ecb6f0823f 100644 --- a/projects/rdc/include/rdc_lib/RdcWatchTable.h +++ b/projects/rdc/include/rdc_lib/RdcWatchTable.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCWATCHTABLE_H_ -#define RDC_LIB_RDCWATCHTABLE_H_ +#ifndef INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ +#define INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ #include #include @@ -57,4 +57,4 @@ typedef std::shared_ptr RdcWatchTablePtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCWATCHTABLE_H_ +#endif // INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h index 6e928fb4a8..9ef38b9502 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ -#define RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ #include #include // NOLINT(build/c++11) @@ -80,13 +80,13 @@ typedef std::map RdcJobStatsCache; class RdcCacheManagerImpl: public RdcCacheManager { public: rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; - rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, - uint64_t since_time_stamp, uint64_t *next_since_time_stamp, - rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; + rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, + rdc_field_t field, uint64_t since_time_stamp, + uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_update_cache(uint32_t gpu_index, const rdc_field_value& value) override; - rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id, + rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) override; std::string get_cache_stats() override; @@ -120,4 +120,4 @@ class RdcCacheManagerImpl: public RdcCacheManager { } // namespace amd -#endif // RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h index 2b09664c15..0541be031f 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ -#define RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ #include // NOLINT(build/c++11) #include "rdc_lib/RdcHandler.h" @@ -57,7 +57,7 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -78,9 +78,9 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; @@ -108,4 +108,4 @@ extern "C" { amd::rdc::RdcHandler *make_handler(rdc_operation_mode_t op_mode); } -#endif // RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h b/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h index d616df5762..c57fe715dc 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h @@ -19,9 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ -#define RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ - +#ifndef INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ #include #include @@ -46,7 +45,7 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { rdc_gpu_group_t group_id_list[], uint32_t* count) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) override; @@ -71,4 +70,4 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { } // namespace amd -#endif // RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h b/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h index 154e466f11..4267acd845 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ -#define RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ #include // NOLINT(build/c++11) #include // NOLINT(build/c++11) @@ -52,18 +52,17 @@ struct MetricTask { class RdcMetricFetcherImpl: public RdcMetricFetcher { public: rdc_status_t fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) override; - bool is_field_valid(uint32_t field_id) const override; + rdc_field_t field_id, rdc_field_value* value) override; RdcMetricFetcherImpl(); ~RdcMetricFetcherImpl(); private: uint64_t now(); void get_ecc_error(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value); + rdc_field_t field_id, rdc_field_value* value); //!< return true if starting async_get bool async_get_pcie_throughput(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value); + rdc_field_t field_id, rdc_field_value* value); void get_pcie_throughput(const RdcFieldKey& key); //!< Async metric retreive @@ -78,4 +77,4 @@ class RdcMetricFetcherImpl: public RdcMetricFetcher { } // namespace rdc } // namespace amd -#endif // RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h b/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h index 8612300db6..72fd9d1661 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h @@ -19,10 +19,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ -#define RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ -#include +#include // NOLINT(build/c++11) #include #include "rdc_lib/RdcMetricsUpdater.h" #include "rdc_lib/RdcWatchTable.h" @@ -46,4 +46,4 @@ class RdcMetricsUpdaterImpl: public RdcMetricsUpdater { } // namespace rdc } // namespace amd -#endif // RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h index 30267ee9ac..439bbbe128 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ -#define RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ #include #include #include "rdc.grpc.pb.h" // NOLINT @@ -53,7 +53,7 @@ class RdcStandaloneHandler: public RdcHandler { rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -74,9 +74,9 @@ class RdcStandaloneHandler: public RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; @@ -107,4 +107,4 @@ extern "C" { const char* root_ca, const char* client_cert, const char* client_key); } -#endif // RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h b/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h index 894860f562..134cec02fd 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ -#define RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ #include #include @@ -86,7 +86,8 @@ class RdcWatchTableImpl : public RdcWatchTable { private: //!< Helper function to Update the fields_in_table when unwatch tables - rdc_status_t update_field_in_table_when_unwatch(const RdcFieldKey& entry); + rdc_status_t update_field_in_table_when_unwatch( + const RdcFieldGroupKey& entry); //!< Helper function to clean up the watch table and cache void clean_up(); @@ -99,15 +100,15 @@ class RdcWatchTableImpl : public RdcWatchTable { rdc_field_grp_t field_group_id, std::vector & fields); // NOLINT - bool is_job_watch_field(uint32_t gpu_index, uint32_t field_id, - std::string& job_id) const; // NOLINT + bool is_job_watch_field(uint32_t gpu_index, rdc_field_t field_id, + std::string& job_id) const; // NOLINT RdcGroupSettingsPtr group_settings_; RdcCacheManagerPtr cache_mgr_; RdcMetricFetcherPtr metric_fetcher_; //!< The watch table to store the watch settings. - std::map watch_table_; + std::map watch_table_; //!< pairs std::map job_watch_table_; @@ -129,4 +130,4 @@ class RdcWatchTableImpl : public RdcWatchTable { } // namespace amd -#endif // RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/rdc_common.h b/projects/rdc/include/rdc_lib/rdc_common.h index 46a1a01e1c..b459741f1b 100644 --- a/projects/rdc/include/rdc_lib/rdc_common.h +++ b/projects/rdc/include/rdc_lib/rdc_common.h @@ -20,12 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDC_COMMON_H_ -#define RDC_LIB_RDC_COMMON_H_ +#ifndef INCLUDE_RDC_LIB_RDC_COMMON_H_ +#define INCLUDE_RDC_LIB_RDC_COMMON_H_ #include #include #include +#include "rdc/rdc.h" + #define RDC_ERROR 0 #define RDC_INFO 1 #define RDC_DEBUG 2 @@ -40,7 +42,10 @@ THE SOFTWARE. } while (0) // -typedef std::pair RdcFieldKey; +typedef std::pair RdcFieldKey; + +// +typedef std::pair RdcFieldGroupKey; //!< The gauge metrics do not require aggregations typedef std::map rdc_gpu_gauges_t; @@ -62,4 +67,4 @@ typedef std::map rdc_gpu_gauges_t; char *strncpy_with_null(char *dest, const char *src, size_t n); -#endif // RDC_LIB_RDC_COMMON_H_ +#endif // INCLUDE_RDC_LIB_RDC_COMMON_H_ diff --git a/projects/rdc/rdc_libs/CMakeLists.txt b/projects/rdc/rdc_libs/CMakeLists.txt index b42f4b19e2..aa5e7ed2a2 100755 --- a/projects/rdc/rdc_libs/CMakeLists.txt +++ b/projects/rdc/rdc_libs/CMakeLists.txt @@ -63,6 +63,7 @@ endif() set(SRC_DIR "${PROJECT_SOURCE_DIR}/rdc_libs") set(RDC_LIB_INC_DIR "${PROJECT_SOURCE_DIR}/include") +set(COMMON_DIR "${PROJECT_SOURCE_DIR}/common") ################# Determine the library version ######################### @@ -112,12 +113,13 @@ set(BOOTSTRAP_LIB_COMPONENT "lib${BOOTSTRAP_LIB}") set(BOOTSTRAP_LIB_SRC_LIST "${SRC_DIR}/bootstrap/src/RdcBootStrap.cc") set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc") set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLibraryLoader.cc") +set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc") set(BOOTSTRAP_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcHandler.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLibraryLoader.h") - +set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h") message("BOOTSTRAP_LIB_INC_LIST=${BOOTSTRAP_LIB_INC_LIST}") add_library(${BOOTSTRAP_LIB} SHARED ${BOOTSTRAP_LIB_SRC_LIST} ${BOOTSTRAP_LIB_INC_LIST}) @@ -125,6 +127,7 @@ target_link_libraries(${BOOTSTRAP_LIB} pthread dl) target_include_directories(${BOOTSTRAP_LIB} PRIVATE "${PROJECT_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/include" + "${COMMON_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/include") # TODO: set the properties for the library once we have one @@ -143,6 +146,7 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcGroupSettingsImp set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcCacheManagerImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcMetricsUpdaterImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcWatchTableImpl.cc") +set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcEmbeddedHandler.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricFetcher.h") @@ -155,6 +159,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricsU set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcMetricsUpdaterImpl.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcWatchTable.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcWatchTableImpl.h") +set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h") message("RDC_LIB_INC_LIST=${RDC_LIB_INC_LIST}") diff --git a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc index e078827fa2..b1640e64be 100644 --- a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include #include +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcHandler.h" #include "rdc_lib/RdcLogger.h" @@ -204,7 +205,7 @@ rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!p_rdc_handle || !field_ids || !field_group_name || !rdc_field_group_id) { @@ -270,7 +271,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!p_rdc_handle || !value) { return RDC_ST_INVALID_HANDLER; } @@ -280,7 +281,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!p_rdc_handle || !next_since_time_stamp || !value) { return RDC_ST_INVALID_HANDLER; @@ -350,30 +351,10 @@ const char* rdc_status_string(rdc_status_t result) { } } -const char* field_id_string(uint32_t field_id) { - const std::map id_name = { - {RDC_FI_GPU_MEMORY_USAGE, "GPU_MEMORY_USAGE"}, - {RDC_FI_GPU_MEMORY_TOTAL, "GPU_MEMORY_TOTAL"}, - {RDC_FI_POWER_USAGE, "POWER_USAGE"}, - {RDC_FI_GPU_CLOCK, "GPU_CLOCK"}, - {RDC_FI_GPU_UTIL, "GPU_UTIL"}, - {RDC_FI_GPU_TEMP, "GPU_TEMP"}, - {RDC_FI_GPU_COUNT, "GPU_COUNT"}, - {RDC_FI_MEM_CLOCK, "MEM_CLOCK"}, - {RDC_FI_PCIE_TX, "PCIE_TX"}, - {RDC_FI_PCIE_RX, "PCIE_RX"}, - {RDC_FI_ECC_CORRECT_TOTAL, "ECC_CORRECT"}, - {RDC_FI_ECC_UNCORRECT_TOTAL, "ECC_UNCORRECT"}, - {RDC_FI_MEMORY_TEMP, "MEMORY_TEMP"}, - {RDC_FI_DEV_NAME, "DEV_NAME"} - }; - - auto search = id_name.find(field_id); - if (search == id_name.end()) { - return "UNKNOWN_FIELD"; - } - - return search->second; +const char* field_id_string(rdc_field_t field_id) { + amd::rdc::fld_id2name_map_t &field_id_to_descript = + amd::rdc::get_field_id_description_from_id(); + return field_id_to_descript.find(field_id)->second.label.c_str(); } char *strncpy_with_null(char *dest, const char *src, size_t n) { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc index 7ba12e4413..0bfb0ca1bc 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc @@ -32,7 +32,7 @@ namespace amd { namespace rdc { rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since( - uint32_t gpu_index, uint32_t field_id, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field_id, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; @@ -72,7 +72,7 @@ rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since( rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index, - uint32_t field_id, uint64_t max_keep_samples, double max_keep_age) { + rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) { std::lock_guard guard(cache_mutex_); RdcFieldKey field{gpu_index, field_id}; @@ -108,7 +108,7 @@ rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index, } rdc_status_t RdcCacheManagerImpl::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field_id, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 43d271f972..2add0edd13 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -29,6 +29,7 @@ THE SOFTWARE. #include "rdc_lib/rdc_common.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/RdcException.h" +#include "common/rdc_fields_supported.h" #include "rocm_smi/rocm_smi.h" namespace { @@ -259,7 +260,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, } rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!field_group_name || !rdc_field_group_id || !field_ids) { return RDC_ST_BAD_PARAMETER; @@ -268,7 +269,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids, // Check the field is valid or not if (num_field_ids <= RDC_MAX_FIELD_IDS_PER_FIELD_GROUP) { for (uint32_t i = 0; i < num_field_ids; i++) { - if (!metric_fetcher_->is_field_valid(field_ids[i])) { + if (!is_field_valid(field_ids[i])) { RDC_LOG(RDC_INFO, "Fail to create field group with unknown field id " << field_ids[i]); @@ -341,11 +342,11 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } - if (!metric_fetcher_->is_field_valid(field)) { + if (!is_field_valid(field)) { RDC_LOG(RDC_INFO, "Fail to get latest value with unknown field id " << field); @@ -355,12 +356,12 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value( } rdc_status_t RdcEmbeddedHandler::rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; } - if (!metric_fetcher_->is_field_valid(field)) { + if (!is_field_valid(field)) { RDC_LOG(RDC_INFO, "Fail to get value since with unknown field id " << field); diff --git a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc index ff7a7f7127..8b2fb102a4 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc @@ -29,7 +29,7 @@ namespace rdc { RdcGroupSettingsImpl::RdcGroupSettingsImpl() { // Add the default job stats fields - uint32_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE, + rdc_field_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE, RDC_FI_POWER_USAGE, RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, RDC_FI_MEM_CLOCK, RDC_FI_GPU_TEMP}; @@ -37,7 +37,7 @@ RdcGroupSettingsImpl::RdcGroupSettingsImpl() { rdc_field_grp_t fgid = JOB_FIELD_ID; rdc_group_field_create(sizeof(job_fields)/sizeof(uint32_t), - job_fields, job_field_group, &fgid); + job_fields, job_field_group, &fgid); } rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create( @@ -133,7 +133,7 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_get_all_ids( } rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create( - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { RDC_LOG(RDC_DEBUG, "Create field group " << field_group_name); diff --git a/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index 269dff471c..99802fa310 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -26,23 +26,13 @@ THE SOFTWARE. #include #include #include "rdc_lib/rdc_common.h" +#include "common/rdc_fields_supported.h" #include "rdc_lib/RdcLogger.h" #include "rocm_smi/rocm_smi.h" namespace amd { namespace rdc { -bool RdcMetricFetcherImpl::is_field_valid(uint32_t field_id) const { - const std::vector all_fields = {RDC_FI_GPU_MEMORY_USAGE, - RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_GPU_COUNT, RDC_FI_POWER_USAGE, - RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL, RDC_FI_DEV_NAME, RDC_FI_GPU_TEMP, - RDC_FI_MEM_CLOCK, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, - RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, RDC_FI_MEMORY_TEMP}; - - return std::find(all_fields.begin(), all_fields.end(), field_id) - != all_fields.end(); -} - RdcMetricFetcherImpl::RdcMetricFetcherImpl() { task_started_ = true; @@ -81,7 +71,7 @@ uint64_t RdcMetricFetcherImpl::now() { } void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { rsmi_status_t err = RSMI_STATUS_SUCCESS; uint64_t correctable_err = 0; uint64_t uncorrectable_err = 0; @@ -121,7 +111,7 @@ void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index, } bool RdcMetricFetcherImpl::async_get_pcie_throughput(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { if (!value) { return false; } @@ -216,7 +206,7 @@ void RdcMetricFetcherImpl::get_pcie_throughput(const RdcFieldKey& key) { } rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc index d00310d9c2..cf7dd07a28 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include "rdc_lib/impl/RdcMetricsUpdaterImpl.h" #include #include -#include +#include // NOLINT(build/c++11) #include "rdc_lib/rdc_common.h" namespace amd { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc index 2114e7b25a..e70fbece9d 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc @@ -176,7 +176,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { std::lock_guard guard(watch_mutex_); - RdcFieldKey gkey({group_id, field_group_id}); + RdcFieldGroupKey gkey({group_id, field_group_id}); auto table_iter = watch_table_.find(gkey); // Already in the watch table @@ -234,7 +234,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcWatchTableImpl::update_field_in_table_when_unwatch( - const RdcFieldKey& entry) { + const RdcFieldGroupKey& entry) { // Get individual fields for this unwatch std::vector fields; rdc_status_t result = get_fields_from_group( @@ -306,7 +306,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch( std::lock_guard guard(watch_mutex_); // Set is_watching = false - auto ite = watch_table_.find(RdcFieldKey({group_id, field_group_id})); + auto ite = watch_table_.find(RdcFieldGroupKey({group_id, field_group_id})); if (ite == watch_table_.end()) { return RDC_ST_NOT_FOUND; } @@ -318,7 +318,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch( } bool RdcWatchTableImpl::is_job_watch_field(uint32_t gpu_index, - uint32_t field_id, std::string& job_id) const { + rdc_field_t field_id, std::string& job_id) const { RdcFieldKey key{gpu_index, field_id}; for (auto ite = job_watch_table_.begin(); diff --git a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index 4abaaa4318..0938fd1b6e 100644 --- a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -290,7 +290,7 @@ rdc_status_t RdcStandaloneHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, } rdc_status_t RdcStandaloneHandler::rdc_group_field_create( - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!field_ids || !field_group_name || !rdc_field_group_id) { return RDC_ST_BAD_PARAMETER; @@ -339,7 +339,8 @@ rdc_status_t RdcStandaloneHandler::rdc_group_field_get_info( strncpy_with_null(field_group_info->group_name, reply.filed_group_name().c_str(), RDC_MAX_STR_LENGTH); for (int i = 0; i < reply.field_ids_size(); i++) { - field_group_info->field_ids[i] = reply.field_ids(i); + field_group_info->field_ids[i] = + static_cast(reply.field_ids(i)); } return RDC_ST_OK; @@ -471,7 +472,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } @@ -487,7 +488,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( rdc_status_t err_status = error_handle(status, reply.status()); if (err_status != RDC_ST_OK) return err_status; - value->field_id = reply.field_id(); + value->field_id = static_cast(reply.field_id()); value->status = reply.rdc_status(); value->ts = reply.ts(); value->type = static_cast(reply.type()); @@ -504,7 +505,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( } rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; @@ -522,7 +523,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index, rdc_status_t err_status = error_handle(status, reply.status()); if (err_status != RDC_ST_OK) return err_status; - value->field_id = reply.field_id(); + value->field_id = static_cast(reply.field_id()); value->status = reply.rdc_status(); value->ts = reply.ts(); value->type = static_cast(reply.type()); diff --git a/projects/rdc/rdci/CMakeLists.txt b/projects/rdc/rdci/CMakeLists.txt index 9ab8303ee9..4cf8543713 100644 --- a/projects/rdc/rdci/CMakeLists.txt +++ b/projects/rdc/rdci/CMakeLists.txt @@ -80,6 +80,8 @@ set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciFieldGroupSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciDmonSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciStatsSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${PROJECT_SOURCE_DIR}/common/rdc_utils.cc") +set(RDCI_SRC_LIST ${RDCI_SRC_LIST} + "${PROJECT_SOURCE_DIR}/common/rdc_fields_supported.cc") message("RDCI_SRC_LIST=${RDCI_SRC_LIST}") set(RDCI_EXE "rdci") diff --git a/projects/rdc/rdci/include/RdciDmonSubSystem.h b/projects/rdc/rdci/include/RdciDmonSubSystem.h index ae3bb296e3..55169cc666 100644 --- a/projects/rdc/rdci/include/RdciDmonSubSystem.h +++ b/projects/rdc/rdci/include/RdciDmonSubSystem.h @@ -61,7 +61,7 @@ class RdciDmonSubSystem: public RdciSubSystem { }; std::map options_; - std::vector field_ids_; + std::vector field_ids_; std::vector gpu_indexes_; bool need_cleanup_; diff --git a/projects/rdc/rdci/include/RdciSubSystem.h b/projects/rdc/rdci/include/RdciSubSystem.h index 86a0b793dd..df8d65ff20 100644 --- a/projects/rdc/rdci/include/RdciSubSystem.h +++ b/projects/rdc/rdci/include/RdciSubSystem.h @@ -47,8 +47,6 @@ class RdciSubSystem { std::vector split_string(const std::string& s, char delimiter) const; void show_common_usage() const; - bool get_field_id_from_name(const std::string& name, - uint32_t& value) const; // NOLINT(runtime/references) rdc_handle_t rdc_handle_; std::string ip_port_; diff --git a/projects/rdc/rdci/src/RdciDmonSubSystem.cc b/projects/rdc/rdci/src/RdciDmonSubSystem.cc index 6c07ac4061..f4fe1fd394 100644 --- a/projects/rdc/rdci/src/RdciDmonSubSystem.cc +++ b/projects/rdc/rdci/src/RdciDmonSubSystem.cc @@ -25,8 +25,13 @@ THE SOFTWARE. #include #include #include +#include +#include +#include + #include "rdc_lib/rdc_common.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcException.h" @@ -147,14 +152,16 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) { std::vector vec_ids = split_string(field_ids, ','); for (uint32_t i = 0; i < vec_ids.size(); i++) { if (!IsNumber(vec_ids[i])) { - uint32_t field_id = 0; - if (!get_field_id_from_name(vec_ids[i], field_id)) { + rdc_field_t field_id = RDC_FI_INVALID; + if (!amd::rdc::get_field_id_from_name(vec_ids[i], + &field_id)) { throw RdcException(RDC_ST_BAD_PARAMETER, "The field name "+vec_ids[i]+" is not valid"); } field_ids_.push_back(field_id); } else { - field_ids_.push_back(std::stoi(vec_ids[i])); + field_ids_.push_back(static_cast( + std::stoi(vec_ids[i]))); } } } @@ -265,7 +272,7 @@ void RdciDmonSubSystem::create_temp_field_group() { const std::string field_group_name("rdci-dmon-field-group"); rdc_field_grp_t group_id; - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (uint32_t i = 0; i < field_ids_.size(); i++) { field_ids[i] = field_ids_[i]; } @@ -280,24 +287,22 @@ void RdciDmonSubSystem::create_temp_field_group() { options_.insert({OPTIONS_FIELD_GROUP_ID, group_id}); } + void RdciDmonSubSystem::show_field_usage() const { - std::cout << "Supported fields Ids:\n"; - std::cout << "100 RDC_FI_GPU_CLOCK: Current GPU clock frequencies.\n"; - std::cout << "101 RDC_FI_MEM_CLOCK: Current Memory clock frequencies.\n"; - std::cout << "140 RDC_FI_MEMORY_TEMP: Memory " - << "temperature in millidegrees Celsius.\n"; - std::cout << "150 RDC_FI_GPU_TEMP: GPU " - << "temperature in millidegrees Celsius.\n"; - std::cout << "155 RDC_FI_POWER_USAGE: Power usage in microwatts.\n"; - std::cout << "200 RDC_FI_PCIE_TX: PCIe Tx utilization in bytes/second.\n"; - std::cout << "201 RDC_FI_PCIE_RX: PCIe Rx utilization in bytes/second.\n"; - std::cout << "203 RDC_FI_GPU_UTIL: GPU busy percentage.\n"; - std::cout << "312 RDC_FI_ECC_CORRECT_TOTAL: Accumulated " - << "correctable ECC errors.\n"; - std::cout << "313 RDC_FI_ECC_UNCORRECT_TOTAL: Accumulated " - << "uncorrectable ECC errors.\n"; - std::cout << "525 RDC_FI_GPU_MEMORY_USAGE: Memory usage of the GPU " - << "instance in bytes.\n"; + std::cout << "Supported fields Ids:" << std::endl; + + amd::rdc::fld_id2name_map_t &field_id_to_descript = + amd::rdc::get_field_id_description_from_id(); + for (auto i = field_id_to_descript.begin(); + i != field_id_to_descript.end(); i++) { + std::cout << i->first << " " << i->second.enum_name << " : " << + i->second.description << "." << std::endl; + } + std::cout << std::endl; + std::cout << "* Note: The field ID number associated with a field ID can " + "change" << std::endl; + std::cout << " from release to release. Field name strings should be " + "used in scripts." << std::endl; } void RdciDmonSubSystem::process() { diff --git a/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc b/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc index 6f9b8268af..a4a0a1295a 100644 --- a/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc +++ b/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include "rdc_lib/rdc_common.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcException.h" @@ -165,15 +166,16 @@ void RdciFieldGroupSubSystem::process() { "Must specify the group name when create a field group"); } std::vector fields = split_string(field_ids_, ','); - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (uint32_t i = 0; i < fields.size(); i++) { if (!IsNumber(fields[i])) { - if (!get_field_id_from_name(fields[i], field_ids[i])) { + if (!get_field_id_from_name(fields[i], &field_ids[i])) { throw RdcException(RDC_ST_BAD_PARAMETER, "The field name "+fields[i]+" is not valid"); } } else { - field_ids[i] = std::stoi(fields[i]); + field_ids[i] = + static_cast(std::stoi(fields[i])); } } rdc_field_grp_t group_id; diff --git a/projects/rdc/rdci/src/RdciSubSystem.cc b/projects/rdc/rdci/src/RdciSubSystem.cc index 7d98525865..0d4b03fb3a 100644 --- a/projects/rdc/rdci/src/RdciSubSystem.cc +++ b/projects/rdc/rdci/src/RdciSubSystem.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include "rdc_lib/RdcException.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" namespace amd { namespace rdc { @@ -45,34 +46,6 @@ bool RdciSubSystem::is_json_output() const { return is_json_output_; } -bool RdciSubSystem::get_field_id_from_name( - const std::string& name, uint32_t& value) const { - const std::map field_name_to_id = { - {"RDC_FI_GPU_MEMORY_USAGE", 525}, - {"RDC_FI_GPU_MEMORY_TOTAL", 580}, - {"RDC_FI_POWER_USAGE", 155}, - {"RDC_FI_GPU_CLOCK", 100}, - {"RDC_FI_MEM_CLOCK", 101}, - {"RDC_FI_PCIE_TX", 200}, - {"RDC_FI_PCIE_RX", 201}, - {"RDC_FI_GPU_UTIL", 203}, - {"RDC_FI_ECC_CORRECT_TOTAL", 312}, - {"RDC_FI_ECC_UNCORRECT_TOTAL", 313}, - {"RDC_FI_MEMORY_TEMP", 140}, - {"RDC_FI_GPU_TEMP", 150}, - {"RDC_FI_GPU_COUNT", 4}, - {"RDC_FI_DEV_NAME", 50} - }; - - auto id = field_name_to_id.find(name); - if (id == field_name_to_id.end()) { - return false; - } - - value = id->second; - return true; -} - std::vector RdciSubSystem::split_string(const std::string& s, char delimiter) const { std::vector tokens; diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index dd7e99bdcc..d798ff0199 100755 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -245,9 +245,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } rdc_field_grp_t field_group_id; - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (int i = 0; i < request->field_ids_size(); i++) { - field_ids[i] = request->field_ids(i); + field_ids[i] = static_cast(request->field_ids(i)); } rdc_status_t result = rdc_group_field_create( rdc_handle_, request->field_ids_size() , &field_ids[0], @@ -331,8 +331,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } rdc_field_value value; - rdc_status_t result = rdc_field_get_latest_value( - rdc_handle_, request->gpu_index(), request->field_id(), &value); + rdc_status_t result = rdc_field_get_latest_value(rdc_handle_, + request->gpu_index(), static_cast(request->field_id()), + &value); reply->set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; @@ -365,9 +366,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { rdc_field_value value; uint64_t next_timestamp; - rdc_status_t result = rdc_field_get_value_since( - rdc_handle_, request->gpu_index(), request->field_id(), - request->since_time_stamp(), &next_timestamp, &value); + rdc_status_t result = rdc_field_get_value_since(rdc_handle_, + request->gpu_index(), static_cast(request->field_id()), + request->since_time_stamp(), &next_timestamp, &value); reply->set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; diff --git a/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc b/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc index 4bdff76ec3..e255d4c3f6 100644 --- a/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc +++ b/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc @@ -33,7 +33,8 @@ THE SOFTWARE. TestRdciDiscovery::TestRdciDiscovery() : TestBase() { set_title("\tRDC Discovery Test"); - set_description("\tThe Discovery tests verifies that the GPUs are discovered and identified ."); + set_description("\tThe Discovery tests verifies that the GPUs are " + "discovered and identified ."); } TestRdciDiscovery::~TestRdciDiscovery(void) { @@ -58,17 +59,16 @@ void TestRdciDiscovery::DisplayResults(void) const { void TestRdciDiscovery::Close() { TestBase::Close(); rdc_status_t result; - if(standalone_){ + if (standalone_) { IF_VERB(STANDARD) { - std::cout << "\t**Disconnecting from host....\n" << std::endl; - } + std::cout << "\t**Disconnecting from host....\n" << std::endl; + } result = rdc_disconnect(rdc_handle); ASSERT_EQ(result, RDC_ST_OK); - } - else{ + } else { IF_VERB(STANDARD) { std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl; - } + } result = rdc_stop_embedded(rdc_handle); ASSERT_EQ(result, RDC_ST_OK); } @@ -77,20 +77,17 @@ void TestRdciDiscovery::Close() { ASSERT_EQ(result, RDC_ST_OK); } - void TestRdciDiscovery::Run(void) { - TestBase::Run(); rdc_status_t result; - if(standalone_){ + if (standalone_) { IF_VERB(STANDARD) { std::cout << "\t**Connecting to host....\n" << std::endl; } char hostIpAddress[] = {"localhost:50051"}; result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr); ASSERT_EQ(result, RDC_ST_OK); - } - else{ + } else { IF_VERB(STANDARD) { std::cout << "\t**Starting embedded RDC engine....\n" << std::endl; } @@ -121,17 +118,17 @@ void TestRdciDiscovery::Run(void) { rdc_device_attributes_t attribute; for (uint32_t i = 0; i < count; i++) { - result = rdc_device_get_attributes(0, gpu_index_list[i], &attribute); ASSERT_EQ(result, RDC_ST_INVALID_HANDLER); result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], 0); ASSERT_EQ(result, RDC_ST_INVALID_HANDLER); - result = rdc_device_get_attributes(rdc_handle,gpu_index_list[i], &attribute); + result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], + &attribute); ASSERT_EQ(result, RDC_ST_OK); - std::cout << "\tGPU ID "<< i << " || " << attribute.device_name <