diff --git a/projects/rdc/common/rdc_field_data.data b/projects/rdc/common/rdc_field_data.data new file mode 100644 index 0000000000..3b27d56b6d --- /dev/null +++ b/projects/rdc/common/rdc_field_data.data @@ -0,0 +1,39 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +FLD_DESC_ENT(RDC_FI_INVALID, "Unknown/Invalid field", "INVALID") +FLD_DESC_ENT(RDC_FI_GPU_COUNT, "GPU count in the system", "GPU_COUNT") +FLD_DESC_ENT(RDC_FI_DEV_NAME, "Name of the device", "DEV_NAME") +FLD_DESC_ENT(RDC_FI_GPU_CLOCK, "Current GPU clock frequencies.", "GPU_CLOCK") +FLD_DESC_ENT(RDC_FI_MEM_CLOCK, "Current Memory clock frequencies.", "MEM_CLOCK") +FLD_DESC_ENT(RDC_FI_MEMORY_TEMP, "Memory temperature in millidegrees Celsius.", "MEMORY_TEMP") +FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsius.", "GPU_TEMP") +FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts.", "POWER_USAGE") +FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second.", "PCIE_TX") +FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second.", "PCIE_RX") +FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage.", "GPU_UTIL") +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, + "Memory usage of the GPU instance in bytes.", "GPU_MEMORY_USAGE") +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL") +FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated correctable ECC errors.", "ECC_CORRECT") +FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL, + "Accumulated uncorrectable ECC errors.", "ECC_UNCORRECT") + diff --git a/projects/rdc/common/rdc_fields_supported.cc b/projects/rdc/common/rdc_fields_supported.cc new file mode 100644 index 0000000000..83c676d852 --- /dev/null +++ b/projects/rdc/common/rdc_fields_supported.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include + +#include + +#include "common/rdc_fields_supported.h" +#include "rdc/rdc.h" +namespace amd { +namespace rdc { + +#define FLD_DESC_ENT(ID, DESC, LABEL) \ + {static_cast(ID), {#ID, (DESC), (LABEL)}}, +static const fld_id2name_map_t field_id_to_descript = { + #include "common/rdc_field_data.data" +}; +#undef FLD_DESC_ENT + +#define FLD_DESC_ENT(ID, DESC, LABEL) {#ID, (ID)}, +static fld_name2id_map_t field_name_to_id = { + #include "common/rdc_field_data.data" // NOLINT +}; +#undef FLD_DESC_ENT + + + +amd::rdc::fld_id2name_map_t & +get_field_id_description_from_id(void) { + return field_id_to_descript; +} + +bool get_field_id_from_name(const std::string name, rdc_field_t *value) { + assert(value != nullptr); + auto id = field_name_to_id.find(name); + if (id == field_name_to_id.end()) { + return false; + } + + *value = static_cast(id->second); + return true; +} + +bool is_field_valid(rdc_field_t field_id) { + if (field_id == RDC_FI_INVALID) { + return false; + } + return field_id_to_descript.find(static_cast(field_id)) != + field_id_to_descript.end(); +} + + +} // namespace rdc +} // namespace amd diff --git a/projects/rdc/common/rdc_fields_supported.h b/projects/rdc/common/rdc_fields_supported.h new file mode 100644 index 0000000000..02c0ebbd09 --- /dev/null +++ b/projects/rdc/common/rdc_fields_supported.h @@ -0,0 +1,50 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef COMMON_RDC_FIELDS_SUPPORTED_H_ +#define COMMON_RDC_FIELDS_SUPPORTED_H_ + +#include +#include + +#include "rdc/rdc.h" + +namespace amd { +namespace rdc { + +typedef struct { + std::string enum_name; + std::string description; + std::string label; +} field_id_descript; + +typedef const std::unordered_map + fld_id2name_map_t; +typedef std::unordered_map fld_name2id_map_t; + +bool get_field_id_from_name(const std::string name, rdc_field_t *value); +fld_id2name_map_t & get_field_id_description_from_id(void); // NOLINT +bool is_field_valid(rdc_field_t field_id); + +} // namespace rdc +} // namespace amd + +#endif // COMMON_RDC_FIELDS_SUPPORTED_H_ diff --git a/projects/rdc/example/field_value_example.cc b/projects/rdc/example/field_value_example.cc index b742c6f958..8f7a1b6ae6 100644 --- a/projects/rdc/example/field_value_example.cc +++ b/projects/rdc/example/field_value_example.cc @@ -129,7 +129,7 @@ int main(int, char **) { // Create the field groups to monitor POWER and TEMP rdc_field_grp_t field_group_id; - uint32_t field_ids[2]; + rdc_field_t field_ids[2]; field_ids[0] = RDC_FI_GPU_MEMORY_USAGE; field_ids[1] = RDC_FI_POWER_USAGE; result = rdc_group_field_create(rdc_handle, 2, diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index ff5a2e5cf8..0491b34604 100755 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -128,77 +128,46 @@ typedef enum { #define RDC_MAX_NUM_FIELD_GROUPS 64 /** - * Memory usage of the GPU instance + * These enums are used to specify a particular field to be retrieved. */ -#define RDC_FI_GPU_MEMORY_USAGE 525 +typedef enum { + RDC_FI_INVALID = 0, //!< Invalid field value + //!< @brief Identifier fields + RDC_FI_GPU_COUNT = 1, //!< GPU count in the system + RDC_FI_DEV_NAME, //!< Name of the device -/** - * Total memory of the GPU instance - */ -#define RDC_FI_GPU_MEMORY_TOTAL 580 + /* + * @brief Frequency related fields + */ + RDC_FI_GPU_CLOCK = 100, //!< The current clock for the GPU + RDC_FI_MEM_CLOCK, //!< Clock for the memory -/** - * Power usage for the device - */ -#define RDC_FI_POWER_USAGE 155 + /* + * @brief Physical monitor fields + */ + RDC_FI_MEMORY_TEMP = 200, //!< Memory temperature for the device + RDC_FI_GPU_TEMP, //!< Current temperature for the device + RDC_FI_POWER_USAGE = 300, //!< Power usage for the device -/** - * The current clock for the GPU - */ -#define RDC_FI_GPU_CLOCK 100 + /* + * @brief PCIe related fields + */ + RDC_FI_PCIE_TX = 400, //!< PCIe Tx utilization information + RDC_FI_PCIE_RX, //!< PCIe Rx utilization information -/** - * Clock for the memory - */ -#define RDC_FI_MEM_CLOCK 101 - -/** - * PCIe Tx utilization information - */ -#define RDC_FI_PCIE_TX 200 - -/** - * PCIe Rx utilization information - */ -#define RDC_FI_PCIE_RX 201 - - -/** - * GPU Utilization - */ -#define RDC_FI_GPU_UTIL 203 - -/** - * Accumulated correctable ECC errors - */ -#define RDC_FI_ECC_CORRECT_TOTAL 312 - -/** - * Accumulated uncorrectable ECC errors - */ -#define RDC_FI_ECC_UNCORRECT_TOTAL 313 - -/** - * Memory temperature for the device - */ -#define RDC_FI_MEMORY_TEMP 140 - -/** - * Current temperature for the device - */ -#define RDC_FI_GPU_TEMP 150 - - -/** - * GPU count in the system - */ -#define RDC_FI_GPU_COUNT 4 - -/** - * Name of the device - */ -#define RDC_FI_DEV_NAME 50 + /* + * @brief GPU usage related fields + */ + RDC_FI_GPU_UTIL = 500, //!< GPU Utilization + RDC_FI_GPU_MEMORY_USAGE, //!< Memory usage of the GPU instance + RDC_FI_GPU_MEMORY_TOTAL, //!< Total memory of the GPU instance + /** + * @brief ECC related fields + */ + RDC_FI_ECC_CORRECT_TOTAL = 600, //!< Accumulated correctable ECC errors + RDC_FI_ECC_UNCORRECT_TOTAL, //!< Accumulated uncorrectable ECC errors +} rdc_field_t; /** * @brief handlers used in various rdc calls @@ -273,7 +242,7 @@ typedef struct { * @brief The structure to store the field value */ typedef struct { - uint32_t field_id; //!< The field id of the value + rdc_field_t field_id; //!< The field id of the value int status; //!< RDC_ST_OK or error status uint64_t ts; //!< Timestamp in usec since 1970 rdc_field_type_t type; //!< The field type @@ -294,7 +263,7 @@ typedef struct { /** * The list of fields in the group */ - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; } rdc_field_group_info_t; /** @@ -647,7 +616,7 @@ rdc_status_t rdc_group_gpu_destroy(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id); /** @@ -743,7 +712,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, rdc_field_value* value); + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value); /** * @brief Request a history cached field of a GPU @@ -767,7 +736,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, * @retval ::RDC_ST_OK is returned upon successful call. */ rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value); /** @@ -806,7 +775,7 @@ const char* rdc_status_string(rdc_status_t status); * * @retval The string to describe the field. */ -const char* field_id_string(uint32_t field_id); +const char* field_id_string(rdc_field_t field_id); #ifdef __cplusplus } diff --git a/projects/rdc/include/rdc_lib/RdcCacheManager.h b/projects/rdc/include/rdc_lib/RdcCacheManager.h index 0052639d26..1cedc06c1b 100644 --- a/projects/rdc/include/rdc_lib/RdcCacheManager.h +++ b/projects/rdc/include/rdc_lib/RdcCacheManager.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCCACHEMANAGER_H_ -#define RDC_LIB_RDCCACHEMANAGER_H_ +#ifndef INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ +#define INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ #include #include @@ -36,13 +36,13 @@ namespace rdc { class RdcCacheManager { public: virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) = 0; + rdc_field_t field, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; virtual rdc_status_t rdc_update_cache(uint32_t gpu_index, const rdc_field_value& value) = 0; - virtual rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id, + virtual rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) = 0; virtual std::string get_cache_stats() = 0; @@ -69,4 +69,4 @@ typedef std::shared_ptr RdcCacheManagerPtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCCACHEMANAGER_H_ +#endif // INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcException.h b/projects/rdc/include/rdc_lib/RdcException.h index 62225c12ef..9c6b320cfc 100644 --- a/projects/rdc/include/rdc_lib/RdcException.h +++ b/projects/rdc/include/rdc_lib/RdcException.h @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDCI_INCLUDE_RDCEXCEPTION_H_ -#define RDCI_INCLUDE_RDCEXCEPTION_H_ +#ifndef INCLUDE_RDC_LIB_RDCEXCEPTION_H_ +#define INCLUDE_RDC_LIB_RDCEXCEPTION_H_ #include #include @@ -45,5 +45,5 @@ class RdcException : public std::exception { } // namespace rdc } // namespace amd -#endif // RDCI_INCLUDE_RDCEXCEPTION_H_ +#endif // INCLUDE_RDC_LIB_RDCEXCEPTION_H_ diff --git a/projects/rdc/include/rdc_lib/RdcGroupSettings.h b/projects/rdc/include/rdc_lib/RdcGroupSettings.h index 89d6f4ed22..4affe16b9e 100644 --- a/projects/rdc/include/rdc_lib/RdcGroupSettings.h +++ b/projects/rdc/include/rdc_lib/RdcGroupSettings.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCGROUPSETTINGS_H_ -#define RDC_LIB_RDCGROUPSETTINGS_H_ +#ifndef INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ +#define INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ #include #include "rdc_lib/rdc_common.h" @@ -45,7 +45,7 @@ class RdcGroupSettings { virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) = 0; virtual rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) = 0; @@ -64,4 +64,4 @@ const uint32_t JOB_FIELD_ID = 0; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCGROUPSETTINGS_H_ +#endif // INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_ diff --git a/projects/rdc/include/rdc_lib/RdcHandler.h b/projects/rdc/include/rdc_lib/RdcHandler.h index a89a9a8e89..08a570b7f1 100644 --- a/projects/rdc/include/rdc_lib/RdcHandler.h +++ b/projects/rdc/include/rdc_lib/RdcHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCHANDLER_H_ -#define RDC_LIB_RDCHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_RDCHANDLER_H_ +#define INCLUDE_RDC_LIB_RDCHANDLER_H_ #include "rdc_lib/rdc_common.h" #include "rdc/rdc.h" @@ -52,7 +52,7 @@ class RdcHandler { virtual rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) = 0; virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) = 0; virtual rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -73,9 +73,9 @@ class RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) = 0; virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) = 0; + rdc_field_t field, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; virtual rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) = 0; @@ -89,4 +89,4 @@ class RdcHandler { } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCHANDLER_H_ +#endif // INCLUDE_RDC_LIB_RDCHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcLibraryLoader.h b/projects/rdc/include/rdc_lib/RdcLibraryLoader.h index 7a3d7fe2be..2b3c1a4352 100644 --- a/projects/rdc/include/rdc_lib/RdcLibraryLoader.h +++ b/projects/rdc/include/rdc_lib/RdcLibraryLoader.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCLIBRARYLOADER_H_ -#define RDC_LIB_RDCLIBRARYLOADER_H_ +#ifndef INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ +#define INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ #include #include #include @@ -83,4 +83,4 @@ template rdc_status_t RdcLibraryLoader::load(const char* filename, } // namespace amd -#endif // RDC_LIB_RDCLIBRARYLOADER_H_ +#endif // INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcLogger.h b/projects/rdc/include/rdc_lib/RdcLogger.h index b2d891b73e..ead06106be 100644 --- a/projects/rdc/include/rdc_lib/RdcLogger.h +++ b/projects/rdc/include/rdc_lib/RdcLogger.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCLOGGER_H_ -#define RDC_LIB_RDCLOGGER_H_ +#ifndef INCLUDE_RDC_LIB_RDCLOGGER_H_ +#define INCLUDE_RDC_LIB_RDCLOGGER_H_ #include #include #include // NOLINT @@ -56,4 +56,4 @@ class RdcLogger { } // namespace amd -#endif // RDC_LIB_RDCLOGGER_H_ +#endif // INCLUDE_RDC_LIB_RDCLOGGER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcMetricFetcher.h b/projects/rdc/include/rdc_lib/RdcMetricFetcher.h index c4c0cd6122..81ef756832 100644 --- a/projects/rdc/include/rdc_lib/RdcMetricFetcher.h +++ b/projects/rdc/include/rdc_lib/RdcMetricFetcher.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCMETRICFETCHER_H_ -#define RDC_LIB_RDCMETRICFETCHER_H_ +#ifndef INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ +#define INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ #include #include "rdc_lib/rdc_common.h" @@ -33,8 +33,7 @@ namespace rdc { class RdcMetricFetcher { public: virtual rdc_status_t fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) = 0; - virtual bool is_field_valid(uint32_t field_id) const = 0; + rdc_field_t field_id, rdc_field_value* value) = 0; virtual ~RdcMetricFetcher() {} }; @@ -43,4 +42,4 @@ typedef std::shared_ptr RdcMetricFetcherPtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCMETRICFETCHER_H_ +#endif // INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h b/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h index d724c483e5..681855aff6 100644 --- a/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h +++ b/projects/rdc/include/rdc_lib/RdcMetricsUpdater.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCMETRICSUPDATER_H_ -#define RDC_LIB_RDCMETRICSUPDATER_H_ +#ifndef INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ +#define INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ #include @@ -39,4 +39,4 @@ typedef std::shared_ptr RdcMetricsUpdaterPtr; } // namespace amd -#endif // RDC_LIB_RDCMETRICSUPDATER_H_ +#endif // INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ diff --git a/projects/rdc/include/rdc_lib/RdcWatchTable.h b/projects/rdc/include/rdc_lib/RdcWatchTable.h index 1597f73011..ecb6f0823f 100644 --- a/projects/rdc/include/rdc_lib/RdcWatchTable.h +++ b/projects/rdc/include/rdc_lib/RdcWatchTable.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDCWATCHTABLE_H_ -#define RDC_LIB_RDCWATCHTABLE_H_ +#ifndef INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ +#define INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ #include #include @@ -57,4 +57,4 @@ typedef std::shared_ptr RdcWatchTablePtr; } // namespace rdc } // namespace amd -#endif // RDC_LIB_RDCWATCHTABLE_H_ +#endif // INCLUDE_RDC_LIB_RDCWATCHTABLE_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h index 6e928fb4a8..9ef38b9502 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ -#define RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ #include #include // NOLINT(build/c++11) @@ -80,13 +80,13 @@ typedef std::map RdcJobStatsCache; class RdcCacheManagerImpl: public RdcCacheManager { public: rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; - rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, - uint64_t since_time_stamp, uint64_t *next_since_time_stamp, - rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; + rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, + rdc_field_t field, uint64_t since_time_stamp, + uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_update_cache(uint32_t gpu_index, const rdc_field_value& value) override; - rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id, + rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) override; std::string get_cache_stats() override; @@ -120,4 +120,4 @@ class RdcCacheManagerImpl: public RdcCacheManager { } // namespace amd -#endif // RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h index 2b09664c15..0541be031f 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ -#define RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ #include // NOLINT(build/c++11) #include "rdc_lib/RdcHandler.h" @@ -57,7 +57,7 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -78,9 +78,9 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; @@ -108,4 +108,4 @@ extern "C" { amd::rdc::RdcHandler *make_handler(rdc_operation_mode_t op_mode); } -#endif // RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h b/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h index d616df5762..c57fe715dc 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcGroupSettingsImpl.h @@ -19,9 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ -#define RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ - +#ifndef INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ #include #include @@ -46,7 +45,7 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { rdc_gpu_group_t group_id_list[], uint32_t* count) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) override; @@ -71,4 +70,4 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { } // namespace amd -#endif // RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h b/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h index 154e466f11..4267acd845 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcMetricFetcherImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ -#define RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ #include // NOLINT(build/c++11) #include // NOLINT(build/c++11) @@ -52,18 +52,17 @@ struct MetricTask { class RdcMetricFetcherImpl: public RdcMetricFetcher { public: rdc_status_t fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) override; - bool is_field_valid(uint32_t field_id) const override; + rdc_field_t field_id, rdc_field_value* value) override; RdcMetricFetcherImpl(); ~RdcMetricFetcherImpl(); private: uint64_t now(); void get_ecc_error(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value); + rdc_field_t field_id, rdc_field_value* value); //!< return true if starting async_get bool async_get_pcie_throughput(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value); + rdc_field_t field_id, rdc_field_value* value); void get_pcie_throughput(const RdcFieldKey& key); //!< Async metric retreive @@ -78,4 +77,4 @@ class RdcMetricFetcherImpl: public RdcMetricFetcher { } // namespace rdc } // namespace amd -#endif // RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h b/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h index 8612300db6..72fd9d1661 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcMetricsUpdaterImpl.h @@ -19,10 +19,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ -#define RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ -#include +#include // NOLINT(build/c++11) #include #include "rdc_lib/RdcMetricsUpdater.h" #include "rdc_lib/RdcWatchTable.h" @@ -46,4 +46,4 @@ class RdcMetricsUpdaterImpl: public RdcMetricsUpdater { } // namespace rdc } // namespace amd -#endif // RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h index 30267ee9ac..439bbbe128 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ -#define RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ #include #include #include "rdc.grpc.pb.h" // NOLINT @@ -53,7 +53,7 @@ class RdcStandaloneHandler: public RdcHandler { rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId, uint32_t gpu_index) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) override; rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, @@ -74,9 +74,9 @@ class RdcStandaloneHandler: public RdcHandler { rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, - uint32_t field, rdc_field_value* value) override; + rdc_field_t field, rdc_field_value* value) override; rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; @@ -107,4 +107,4 @@ extern "C" { const char* root_ca, const char* client_cert, const char* client_key); } -#endif // RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_ diff --git a/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h b/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h index 894860f562..134cec02fd 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcWatchTableImpl.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ -#define RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#ifndef INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#define INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ #include #include @@ -86,7 +86,8 @@ class RdcWatchTableImpl : public RdcWatchTable { private: //!< Helper function to Update the fields_in_table when unwatch tables - rdc_status_t update_field_in_table_when_unwatch(const RdcFieldKey& entry); + rdc_status_t update_field_in_table_when_unwatch( + const RdcFieldGroupKey& entry); //!< Helper function to clean up the watch table and cache void clean_up(); @@ -99,15 +100,15 @@ class RdcWatchTableImpl : public RdcWatchTable { rdc_field_grp_t field_group_id, std::vector & fields); // NOLINT - bool is_job_watch_field(uint32_t gpu_index, uint32_t field_id, - std::string& job_id) const; // NOLINT + bool is_job_watch_field(uint32_t gpu_index, rdc_field_t field_id, + std::string& job_id) const; // NOLINT RdcGroupSettingsPtr group_settings_; RdcCacheManagerPtr cache_mgr_; RdcMetricFetcherPtr metric_fetcher_; //!< The watch table to store the watch settings. - std::map watch_table_; + std::map watch_table_; //!< pairs std::map job_watch_table_; @@ -129,4 +130,4 @@ class RdcWatchTableImpl : public RdcWatchTable { } // namespace amd -#endif // RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ +#endif // INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_ diff --git a/projects/rdc/include/rdc_lib/rdc_common.h b/projects/rdc/include/rdc_lib/rdc_common.h index 46a1a01e1c..b459741f1b 100644 --- a/projects/rdc/include/rdc_lib/rdc_common.h +++ b/projects/rdc/include/rdc_lib/rdc_common.h @@ -20,12 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef RDC_LIB_RDC_COMMON_H_ -#define RDC_LIB_RDC_COMMON_H_ +#ifndef INCLUDE_RDC_LIB_RDC_COMMON_H_ +#define INCLUDE_RDC_LIB_RDC_COMMON_H_ #include #include #include +#include "rdc/rdc.h" + #define RDC_ERROR 0 #define RDC_INFO 1 #define RDC_DEBUG 2 @@ -40,7 +42,10 @@ THE SOFTWARE. } while (0) // -typedef std::pair RdcFieldKey; +typedef std::pair RdcFieldKey; + +// +typedef std::pair RdcFieldGroupKey; //!< The gauge metrics do not require aggregations typedef std::map rdc_gpu_gauges_t; @@ -62,4 +67,4 @@ typedef std::map rdc_gpu_gauges_t; char *strncpy_with_null(char *dest, const char *src, size_t n); -#endif // RDC_LIB_RDC_COMMON_H_ +#endif // INCLUDE_RDC_LIB_RDC_COMMON_H_ diff --git a/projects/rdc/rdc_libs/CMakeLists.txt b/projects/rdc/rdc_libs/CMakeLists.txt index b42f4b19e2..aa5e7ed2a2 100755 --- a/projects/rdc/rdc_libs/CMakeLists.txt +++ b/projects/rdc/rdc_libs/CMakeLists.txt @@ -63,6 +63,7 @@ endif() set(SRC_DIR "${PROJECT_SOURCE_DIR}/rdc_libs") set(RDC_LIB_INC_DIR "${PROJECT_SOURCE_DIR}/include") +set(COMMON_DIR "${PROJECT_SOURCE_DIR}/common") ################# Determine the library version ######################### @@ -112,12 +113,13 @@ set(BOOTSTRAP_LIB_COMPONENT "lib${BOOTSTRAP_LIB}") set(BOOTSTRAP_LIB_SRC_LIST "${SRC_DIR}/bootstrap/src/RdcBootStrap.cc") set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc") set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLibraryLoader.cc") +set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc") set(BOOTSTRAP_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcHandler.h") set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLibraryLoader.h") - +set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h") message("BOOTSTRAP_LIB_INC_LIST=${BOOTSTRAP_LIB_INC_LIST}") add_library(${BOOTSTRAP_LIB} SHARED ${BOOTSTRAP_LIB_SRC_LIST} ${BOOTSTRAP_LIB_INC_LIST}) @@ -125,6 +127,7 @@ target_link_libraries(${BOOTSTRAP_LIB} pthread dl) target_include_directories(${BOOTSTRAP_LIB} PRIVATE "${PROJECT_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/include" + "${COMMON_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/include") # TODO: set the properties for the library once we have one @@ -143,6 +146,7 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcGroupSettingsImp set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcCacheManagerImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcMetricsUpdaterImpl.cc") set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcWatchTableImpl.cc") +set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcEmbeddedHandler.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricFetcher.h") @@ -155,6 +159,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricsU set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcMetricsUpdaterImpl.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcWatchTable.h") set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcWatchTableImpl.h") +set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h") message("RDC_LIB_INC_LIST=${RDC_LIB_INC_LIST}") diff --git a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc index e078827fa2..b1640e64be 100644 --- a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include #include +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcHandler.h" #include "rdc_lib/RdcLogger.h" @@ -204,7 +205,7 @@ rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!p_rdc_handle || !field_ids || !field_group_name || !rdc_field_group_id) { @@ -270,7 +271,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!p_rdc_handle || !value) { return RDC_ST_INVALID_HANDLER; } @@ -280,7 +281,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, } rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, - uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!p_rdc_handle || !next_since_time_stamp || !value) { return RDC_ST_INVALID_HANDLER; @@ -350,30 +351,10 @@ const char* rdc_status_string(rdc_status_t result) { } } -const char* field_id_string(uint32_t field_id) { - const std::map id_name = { - {RDC_FI_GPU_MEMORY_USAGE, "GPU_MEMORY_USAGE"}, - {RDC_FI_GPU_MEMORY_TOTAL, "GPU_MEMORY_TOTAL"}, - {RDC_FI_POWER_USAGE, "POWER_USAGE"}, - {RDC_FI_GPU_CLOCK, "GPU_CLOCK"}, - {RDC_FI_GPU_UTIL, "GPU_UTIL"}, - {RDC_FI_GPU_TEMP, "GPU_TEMP"}, - {RDC_FI_GPU_COUNT, "GPU_COUNT"}, - {RDC_FI_MEM_CLOCK, "MEM_CLOCK"}, - {RDC_FI_PCIE_TX, "PCIE_TX"}, - {RDC_FI_PCIE_RX, "PCIE_RX"}, - {RDC_FI_ECC_CORRECT_TOTAL, "ECC_CORRECT"}, - {RDC_FI_ECC_UNCORRECT_TOTAL, "ECC_UNCORRECT"}, - {RDC_FI_MEMORY_TEMP, "MEMORY_TEMP"}, - {RDC_FI_DEV_NAME, "DEV_NAME"} - }; - - auto search = id_name.find(field_id); - if (search == id_name.end()) { - return "UNKNOWN_FIELD"; - } - - return search->second; +const char* field_id_string(rdc_field_t field_id) { + amd::rdc::fld_id2name_map_t &field_id_to_descript = + amd::rdc::get_field_id_description_from_id(); + return field_id_to_descript.find(field_id)->second.label.c_str(); } char *strncpy_with_null(char *dest, const char *src, size_t n) { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc index 7ba12e4413..0bfb0ca1bc 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc @@ -32,7 +32,7 @@ namespace amd { namespace rdc { rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since( - uint32_t gpu_index, uint32_t field_id, uint64_t since_time_stamp, + uint32_t gpu_index, rdc_field_t field_id, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; @@ -72,7 +72,7 @@ rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since( rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index, - uint32_t field_id, uint64_t max_keep_samples, double max_keep_age) { + rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) { std::lock_guard guard(cache_mutex_); RdcFieldKey field{gpu_index, field_id}; @@ -108,7 +108,7 @@ rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index, } rdc_status_t RdcCacheManagerImpl::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field_id, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 43d271f972..2add0edd13 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -29,6 +29,7 @@ THE SOFTWARE. #include "rdc_lib/rdc_common.h" #include "rdc_lib/RdcLogger.h" #include "rdc_lib/RdcException.h" +#include "common/rdc_fields_supported.h" #include "rocm_smi/rocm_smi.h" namespace { @@ -259,7 +260,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, } rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids, - uint32_t* field_ids, const char* field_group_name, + rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!field_group_name || !rdc_field_group_id || !field_ids) { return RDC_ST_BAD_PARAMETER; @@ -268,7 +269,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids, // Check the field is valid or not if (num_field_ids <= RDC_MAX_FIELD_IDS_PER_FIELD_GROUP) { for (uint32_t i = 0; i < num_field_ids; i++) { - if (!metric_fetcher_->is_field_valid(field_ids[i])) { + if (!is_field_valid(field_ids[i])) { RDC_LOG(RDC_INFO, "Fail to create field group with unknown field id " << field_ids[i]); @@ -341,11 +342,11 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } - if (!metric_fetcher_->is_field_valid(field)) { + if (!is_field_valid(field)) { RDC_LOG(RDC_INFO, "Fail to get latest value with unknown field id " << field); @@ -355,12 +356,12 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value( } rdc_status_t RdcEmbeddedHandler::rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; } - if (!metric_fetcher_->is_field_valid(field)) { + if (!is_field_valid(field)) { RDC_LOG(RDC_INFO, "Fail to get value since with unknown field id " << field); diff --git a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc index ff7a7f7127..8b2fb102a4 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc @@ -29,7 +29,7 @@ namespace rdc { RdcGroupSettingsImpl::RdcGroupSettingsImpl() { // Add the default job stats fields - uint32_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE, + rdc_field_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE, RDC_FI_POWER_USAGE, RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, RDC_FI_MEM_CLOCK, RDC_FI_GPU_TEMP}; @@ -37,7 +37,7 @@ RdcGroupSettingsImpl::RdcGroupSettingsImpl() { rdc_field_grp_t fgid = JOB_FIELD_ID; rdc_group_field_create(sizeof(job_fields)/sizeof(uint32_t), - job_fields, job_field_group, &fgid); + job_fields, job_field_group, &fgid); } rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create( @@ -133,7 +133,7 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_get_all_ids( } rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create( - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { RDC_LOG(RDC_DEBUG, "Create field group " << field_group_name); diff --git a/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index 269dff471c..99802fa310 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -26,23 +26,13 @@ THE SOFTWARE. #include #include #include "rdc_lib/rdc_common.h" +#include "common/rdc_fields_supported.h" #include "rdc_lib/RdcLogger.h" #include "rocm_smi/rocm_smi.h" namespace amd { namespace rdc { -bool RdcMetricFetcherImpl::is_field_valid(uint32_t field_id) const { - const std::vector all_fields = {RDC_FI_GPU_MEMORY_USAGE, - RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_GPU_COUNT, RDC_FI_POWER_USAGE, - RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL, RDC_FI_DEV_NAME, RDC_FI_GPU_TEMP, - RDC_FI_MEM_CLOCK, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, - RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, RDC_FI_MEMORY_TEMP}; - - return std::find(all_fields.begin(), all_fields.end(), field_id) - != all_fields.end(); -} - RdcMetricFetcherImpl::RdcMetricFetcherImpl() { task_started_ = true; @@ -81,7 +71,7 @@ uint64_t RdcMetricFetcherImpl::now() { } void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { rsmi_status_t err = RSMI_STATUS_SUCCESS; uint64_t correctable_err = 0; uint64_t uncorrectable_err = 0; @@ -121,7 +111,7 @@ void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index, } bool RdcMetricFetcherImpl::async_get_pcie_throughput(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { if (!value) { return false; } @@ -216,7 +206,7 @@ void RdcMetricFetcherImpl::get_pcie_throughput(const RdcFieldKey& key) { } rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, - uint32_t field_id, rdc_field_value* value) { + rdc_field_t field_id, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc index d00310d9c2..cf7dd07a28 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include "rdc_lib/impl/RdcMetricsUpdaterImpl.h" #include #include -#include +#include // NOLINT(build/c++11) #include "rdc_lib/rdc_common.h" namespace amd { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc index 2114e7b25a..e70fbece9d 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcWatchTableImpl.cc @@ -176,7 +176,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { std::lock_guard guard(watch_mutex_); - RdcFieldKey gkey({group_id, field_group_id}); + RdcFieldGroupKey gkey({group_id, field_group_id}); auto table_iter = watch_table_.find(gkey); // Already in the watch table @@ -234,7 +234,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcWatchTableImpl::update_field_in_table_when_unwatch( - const RdcFieldKey& entry) { + const RdcFieldGroupKey& entry) { // Get individual fields for this unwatch std::vector fields; rdc_status_t result = get_fields_from_group( @@ -306,7 +306,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch( std::lock_guard guard(watch_mutex_); // Set is_watching = false - auto ite = watch_table_.find(RdcFieldKey({group_id, field_group_id})); + auto ite = watch_table_.find(RdcFieldGroupKey({group_id, field_group_id})); if (ite == watch_table_.end()) { return RDC_ST_NOT_FOUND; } @@ -318,7 +318,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch( } bool RdcWatchTableImpl::is_job_watch_field(uint32_t gpu_index, - uint32_t field_id, std::string& job_id) const { + rdc_field_t field_id, std::string& job_id) const { RdcFieldKey key{gpu_index, field_id}; for (auto ite = job_watch_table_.begin(); diff --git a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index 4abaaa4318..0938fd1b6e 100644 --- a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -290,7 +290,7 @@ rdc_status_t RdcStandaloneHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, } rdc_status_t RdcStandaloneHandler::rdc_group_field_create( - uint32_t num_field_ids, uint32_t* field_ids, + uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { if (!field_ids || !field_group_name || !rdc_field_group_id) { return RDC_ST_BAD_PARAMETER; @@ -339,7 +339,8 @@ rdc_status_t RdcStandaloneHandler::rdc_group_field_get_info( strncpy_with_null(field_group_info->group_name, reply.filed_group_name().c_str(), RDC_MAX_STR_LENGTH); for (int i = 0; i < reply.field_ids_size(); i++) { - field_group_info->field_ids[i] = reply.field_ids(i); + field_group_info->field_ids[i] = + static_cast(reply.field_ids(i)); } return RDC_ST_OK; @@ -471,7 +472,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_watch(rdc_gpu_group_t group_id, } rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( - uint32_t gpu_index, uint32_t field, rdc_field_value* value) { + uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; } @@ -487,7 +488,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( rdc_status_t err_status = error_handle(status, reply.status()); if (err_status != RDC_ST_OK) return err_status; - value->field_id = reply.field_id(); + value->field_id = static_cast(reply.field_id()); value->status = reply.rdc_status(); value->ts = reply.ts(); value->type = static_cast(reply.type()); @@ -504,7 +505,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( } rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index, - uint32_t field, uint64_t since_time_stamp, + rdc_field_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; @@ -522,7 +523,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index, rdc_status_t err_status = error_handle(status, reply.status()); if (err_status != RDC_ST_OK) return err_status; - value->field_id = reply.field_id(); + value->field_id = static_cast(reply.field_id()); value->status = reply.rdc_status(); value->ts = reply.ts(); value->type = static_cast(reply.type()); diff --git a/projects/rdc/rdci/CMakeLists.txt b/projects/rdc/rdci/CMakeLists.txt index 9ab8303ee9..4cf8543713 100644 --- a/projects/rdc/rdci/CMakeLists.txt +++ b/projects/rdc/rdci/CMakeLists.txt @@ -80,6 +80,8 @@ set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciFieldGroupSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciDmonSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciStatsSubSystem.cc") set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${PROJECT_SOURCE_DIR}/common/rdc_utils.cc") +set(RDCI_SRC_LIST ${RDCI_SRC_LIST} + "${PROJECT_SOURCE_DIR}/common/rdc_fields_supported.cc") message("RDCI_SRC_LIST=${RDCI_SRC_LIST}") set(RDCI_EXE "rdci") diff --git a/projects/rdc/rdci/include/RdciDmonSubSystem.h b/projects/rdc/rdci/include/RdciDmonSubSystem.h index ae3bb296e3..55169cc666 100644 --- a/projects/rdc/rdci/include/RdciDmonSubSystem.h +++ b/projects/rdc/rdci/include/RdciDmonSubSystem.h @@ -61,7 +61,7 @@ class RdciDmonSubSystem: public RdciSubSystem { }; std::map options_; - std::vector field_ids_; + std::vector field_ids_; std::vector gpu_indexes_; bool need_cleanup_; diff --git a/projects/rdc/rdci/include/RdciSubSystem.h b/projects/rdc/rdci/include/RdciSubSystem.h index 86a0b793dd..df8d65ff20 100644 --- a/projects/rdc/rdci/include/RdciSubSystem.h +++ b/projects/rdc/rdci/include/RdciSubSystem.h @@ -47,8 +47,6 @@ class RdciSubSystem { std::vector split_string(const std::string& s, char delimiter) const; void show_common_usage() const; - bool get_field_id_from_name(const std::string& name, - uint32_t& value) const; // NOLINT(runtime/references) rdc_handle_t rdc_handle_; std::string ip_port_; diff --git a/projects/rdc/rdci/src/RdciDmonSubSystem.cc b/projects/rdc/rdci/src/RdciDmonSubSystem.cc index 6c07ac4061..f4fe1fd394 100644 --- a/projects/rdc/rdci/src/RdciDmonSubSystem.cc +++ b/projects/rdc/rdci/src/RdciDmonSubSystem.cc @@ -25,8 +25,13 @@ THE SOFTWARE. #include #include #include +#include +#include +#include + #include "rdc_lib/rdc_common.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcException.h" @@ -147,14 +152,16 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) { std::vector vec_ids = split_string(field_ids, ','); for (uint32_t i = 0; i < vec_ids.size(); i++) { if (!IsNumber(vec_ids[i])) { - uint32_t field_id = 0; - if (!get_field_id_from_name(vec_ids[i], field_id)) { + rdc_field_t field_id = RDC_FI_INVALID; + if (!amd::rdc::get_field_id_from_name(vec_ids[i], + &field_id)) { throw RdcException(RDC_ST_BAD_PARAMETER, "The field name "+vec_ids[i]+" is not valid"); } field_ids_.push_back(field_id); } else { - field_ids_.push_back(std::stoi(vec_ids[i])); + field_ids_.push_back(static_cast( + std::stoi(vec_ids[i]))); } } } @@ -265,7 +272,7 @@ void RdciDmonSubSystem::create_temp_field_group() { const std::string field_group_name("rdci-dmon-field-group"); rdc_field_grp_t group_id; - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (uint32_t i = 0; i < field_ids_.size(); i++) { field_ids[i] = field_ids_[i]; } @@ -280,24 +287,22 @@ void RdciDmonSubSystem::create_temp_field_group() { options_.insert({OPTIONS_FIELD_GROUP_ID, group_id}); } + void RdciDmonSubSystem::show_field_usage() const { - std::cout << "Supported fields Ids:\n"; - std::cout << "100 RDC_FI_GPU_CLOCK: Current GPU clock frequencies.\n"; - std::cout << "101 RDC_FI_MEM_CLOCK: Current Memory clock frequencies.\n"; - std::cout << "140 RDC_FI_MEMORY_TEMP: Memory " - << "temperature in millidegrees Celsius.\n"; - std::cout << "150 RDC_FI_GPU_TEMP: GPU " - << "temperature in millidegrees Celsius.\n"; - std::cout << "155 RDC_FI_POWER_USAGE: Power usage in microwatts.\n"; - std::cout << "200 RDC_FI_PCIE_TX: PCIe Tx utilization in bytes/second.\n"; - std::cout << "201 RDC_FI_PCIE_RX: PCIe Rx utilization in bytes/second.\n"; - std::cout << "203 RDC_FI_GPU_UTIL: GPU busy percentage.\n"; - std::cout << "312 RDC_FI_ECC_CORRECT_TOTAL: Accumulated " - << "correctable ECC errors.\n"; - std::cout << "313 RDC_FI_ECC_UNCORRECT_TOTAL: Accumulated " - << "uncorrectable ECC errors.\n"; - std::cout << "525 RDC_FI_GPU_MEMORY_USAGE: Memory usage of the GPU " - << "instance in bytes.\n"; + std::cout << "Supported fields Ids:" << std::endl; + + amd::rdc::fld_id2name_map_t &field_id_to_descript = + amd::rdc::get_field_id_description_from_id(); + for (auto i = field_id_to_descript.begin(); + i != field_id_to_descript.end(); i++) { + std::cout << i->first << " " << i->second.enum_name << " : " << + i->second.description << "." << std::endl; + } + std::cout << std::endl; + std::cout << "* Note: The field ID number associated with a field ID can " + "change" << std::endl; + std::cout << " from release to release. Field name strings should be " + "used in scripts." << std::endl; } void RdciDmonSubSystem::process() { diff --git a/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc b/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc index 6f9b8268af..a4a0a1295a 100644 --- a/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc +++ b/projects/rdc/rdci/src/RdciFieldGroupSubSystem.cc @@ -24,6 +24,7 @@ THE SOFTWARE. #include #include "rdc_lib/rdc_common.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" #include "rdc/rdc.h" #include "rdc_lib/RdcException.h" @@ -165,15 +166,16 @@ void RdciFieldGroupSubSystem::process() { "Must specify the group name when create a field group"); } std::vector fields = split_string(field_ids_, ','); - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (uint32_t i = 0; i < fields.size(); i++) { if (!IsNumber(fields[i])) { - if (!get_field_id_from_name(fields[i], field_ids[i])) { + if (!get_field_id_from_name(fields[i], &field_ids[i])) { throw RdcException(RDC_ST_BAD_PARAMETER, "The field name "+fields[i]+" is not valid"); } } else { - field_ids[i] = std::stoi(fields[i]); + field_ids[i] = + static_cast(std::stoi(fields[i])); } } rdc_field_grp_t group_id; diff --git a/projects/rdc/rdci/src/RdciSubSystem.cc b/projects/rdc/rdci/src/RdciSubSystem.cc index 7d98525865..0d4b03fb3a 100644 --- a/projects/rdc/rdci/src/RdciSubSystem.cc +++ b/projects/rdc/rdci/src/RdciSubSystem.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include "rdc_lib/RdcException.h" #include "common/rdc_utils.h" +#include "common/rdc_fields_supported.h" namespace amd { namespace rdc { @@ -45,34 +46,6 @@ bool RdciSubSystem::is_json_output() const { return is_json_output_; } -bool RdciSubSystem::get_field_id_from_name( - const std::string& name, uint32_t& value) const { - const std::map field_name_to_id = { - {"RDC_FI_GPU_MEMORY_USAGE", 525}, - {"RDC_FI_GPU_MEMORY_TOTAL", 580}, - {"RDC_FI_POWER_USAGE", 155}, - {"RDC_FI_GPU_CLOCK", 100}, - {"RDC_FI_MEM_CLOCK", 101}, - {"RDC_FI_PCIE_TX", 200}, - {"RDC_FI_PCIE_RX", 201}, - {"RDC_FI_GPU_UTIL", 203}, - {"RDC_FI_ECC_CORRECT_TOTAL", 312}, - {"RDC_FI_ECC_UNCORRECT_TOTAL", 313}, - {"RDC_FI_MEMORY_TEMP", 140}, - {"RDC_FI_GPU_TEMP", 150}, - {"RDC_FI_GPU_COUNT", 4}, - {"RDC_FI_DEV_NAME", 50} - }; - - auto id = field_name_to_id.find(name); - if (id == field_name_to_id.end()) { - return false; - } - - value = id->second; - return true; -} - std::vector RdciSubSystem::split_string(const std::string& s, char delimiter) const { std::vector tokens; diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index dd7e99bdcc..d798ff0199 100755 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -245,9 +245,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } rdc_field_grp_t field_group_id; - uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; for (int i = 0; i < request->field_ids_size(); i++) { - field_ids[i] = request->field_ids(i); + field_ids[i] = static_cast(request->field_ids(i)); } rdc_status_t result = rdc_group_field_create( rdc_handle_, request->field_ids_size() , &field_ids[0], @@ -331,8 +331,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } rdc_field_value value; - rdc_status_t result = rdc_field_get_latest_value( - rdc_handle_, request->gpu_index(), request->field_id(), &value); + rdc_status_t result = rdc_field_get_latest_value(rdc_handle_, + request->gpu_index(), static_cast(request->field_id()), + &value); reply->set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; @@ -365,9 +366,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { rdc_field_value value; uint64_t next_timestamp; - rdc_status_t result = rdc_field_get_value_since( - rdc_handle_, request->gpu_index(), request->field_id(), - request->since_time_stamp(), &next_timestamp, &value); + rdc_status_t result = rdc_field_get_value_since(rdc_handle_, + request->gpu_index(), static_cast(request->field_id()), + request->since_time_stamp(), &next_timestamp, &value); reply->set_status(result); if (result != RDC_ST_OK) { return ::grpc::Status::OK; diff --git a/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc b/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc index 4bdff76ec3..e255d4c3f6 100644 --- a/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc +++ b/projects/rdc/tests/rdc_tests/functional/rdci_discovery.cc @@ -33,7 +33,8 @@ THE SOFTWARE. TestRdciDiscovery::TestRdciDiscovery() : TestBase() { set_title("\tRDC Discovery Test"); - set_description("\tThe Discovery tests verifies that the GPUs are discovered and identified ."); + set_description("\tThe Discovery tests verifies that the GPUs are " + "discovered and identified ."); } TestRdciDiscovery::~TestRdciDiscovery(void) { @@ -58,17 +59,16 @@ void TestRdciDiscovery::DisplayResults(void) const { void TestRdciDiscovery::Close() { TestBase::Close(); rdc_status_t result; - if(standalone_){ + if (standalone_) { IF_VERB(STANDARD) { - std::cout << "\t**Disconnecting from host....\n" << std::endl; - } + std::cout << "\t**Disconnecting from host....\n" << std::endl; + } result = rdc_disconnect(rdc_handle); ASSERT_EQ(result, RDC_ST_OK); - } - else{ + } else { IF_VERB(STANDARD) { std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl; - } + } result = rdc_stop_embedded(rdc_handle); ASSERT_EQ(result, RDC_ST_OK); } @@ -77,20 +77,17 @@ void TestRdciDiscovery::Close() { ASSERT_EQ(result, RDC_ST_OK); } - void TestRdciDiscovery::Run(void) { - TestBase::Run(); rdc_status_t result; - if(standalone_){ + if (standalone_) { IF_VERB(STANDARD) { std::cout << "\t**Connecting to host....\n" << std::endl; } char hostIpAddress[] = {"localhost:50051"}; result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr); ASSERT_EQ(result, RDC_ST_OK); - } - else{ + } else { IF_VERB(STANDARD) { std::cout << "\t**Starting embedded RDC engine....\n" << std::endl; } @@ -121,17 +118,17 @@ void TestRdciDiscovery::Run(void) { rdc_device_attributes_t attribute; for (uint32_t i = 0; i < count; i++) { - result = rdc_device_get_attributes(0, gpu_index_list[i], &attribute); ASSERT_EQ(result, RDC_ST_INVALID_HANDLER); result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], 0); ASSERT_EQ(result, RDC_ST_INVALID_HANDLER); - result = rdc_device_get_attributes(rdc_handle,gpu_index_list[i], &attribute); + result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], + &attribute); ASSERT_EQ(result, RDC_ST_OK); - std::cout << "\tGPU ID "<< i << " || " << attribute.device_name <